function [ X, header ] = pnExtractData(data, rule, T, fields) %PNEXTRACTDATA Extract data using the given rule for a given window % [ X ] = pnExtractData(data, rule, window) extracts data according to % the rule given in the window, T, provided. Rule is a choice of what % data to extract (e.g., 'max'), and window is a 2 element vector with % the minimum and maximum time values to consider. If window is one % element, only that time value is considered. % % [ X ] = pnExtractData(data, rule, window, fields) limits the data % extracted to the given fields. % % [ X, header] = pnExtractData( ... ) also outputs a header vector for % the data matrix X. % % Inputs: % data - Cell array of data. % Column 1 - Subject IDs % Column 2 - Time stamp vectors for each subject % Column 3 - Feature name vectors for each subject % Column 4 - Data value vectors for each subject % % rule - String specifying the rule to use when selecting data % max - Maximum value % min - Minimum value % mean - Mean value % average - Mean value % median - Median value % % first - First measurement recorded % last - Last measurement recorded % % T - Window used - 2 element vector % % Outputs: % X - Data, NxD, where D is the number of features and N is the % number of observations. % % Example % bpath = './set-a/'; % data = pnLoadTextFilesCell(bpath); % X = pnExtractData(data,'min',[0 2880]); % extract minimum value across 2 days % See also PNGENERATEFEATURES % References: % Physionet Challenge 2012 % Copyright 2012 Alistair Johnson % $LastChangedBy: alistair $ % $LastChangedDate: 2012-08-23 18:39:12 -0400 (Thu, 23 Aug 2012) $ % $Revision: 152 $ % Originally written on GLNXA64 by Alistair Johnson, 15-May-2012 15:08:24 % Contact: alistairewj@gmail.com if nargin<1 X=[]; return; end if nargin<2 rule = {'first'}; elseif ischar(rule) rule = {lower(rule)}; elseif iscell(rule) rule = lower(rule(:)); else rule = {'first'}; end if nargin<3 || ~isnumeric(T) T = [0,2880]; end feats = unique(vertcat(data{:,3})); if nargin<4 %=== Use all features data_used = data; else %=== Extract only given fields if ischar(fields) fields = {fields}; % encapsulate in cell array of strings end fields = sort(fields); if numel(fields)==numel(feats) && any(strcmp(feats,fields)==0) % if any strings do not match %=== Preallocate data_used = cell(size(data)); %=== Loop through fields and input them into data_used for k=1:numel(fields) data_temp = pnExtractField(data,fields{k}); data_used = pnImputeField(data_used,data_temp); end else %=== save time by skipping extract/impute data_used = data; end end %=== Use rule to set evaluation function R = numel(rule); rfcn = cell(1,R); for r=1:R switch rule{r} case {'min','lowest'} rfcn{r} = @min; rule{r} = 'min'; case {'max','highest'} rfcn{r} = @min; rule{r} = 'max'; case 'median' rfcn{r} = @median; case {'mean','average'} rfcn{r} = @mean; rule{r} = 'mean'; case 'first' rfcn{r} = @(x) x(1); case 'last' rfcn{r} = @(x) x(end); case 'sum' rfcn{r} = @sum; otherwise % default first value rfcn{r} = @(x) x(1); end end %=== Check window if numel(T)==1 T = T; windowFcn = @(x,win) x==win; elseif numel(T)==2 T = sort(T); windowFcn = @(x,win) x>=win(1) & x<=win(2); else T = sort(T(1:2)); windowFcn = @(x,win) x>=win(1) & x<=win(2); end %=== Get feature names in data idxExist = cellfun(@(x) ~isempty(x), data_used(:,3)); D = numel(feats); X = nan(size(data_used,1), D); %=== Get indices of data within window idxUsedExist = idxExist; idxUsed = cellfun(@(x) windowFcn(x,T), data_used(idxExist,2),'UniformOutput',false); idxUsedExist(cellfun(@isempty, idxUsed)) = false; %=== Remove un-used data from each cell data_used(idxUsedExist,2) = cellfun(@(x,y) x(y), data_used(idxUsedExist,2), idxUsed,'UniformOutput',false); data_used(idxUsedExist,3) = cellfun(@(x,y) x(y), data_used(idxUsedExist,3), idxUsed,'UniformOutput',false); data_used(idxUsedExist,4) = cellfun(@(x,y) x(y), data_used(idxUsedExist,4), idxUsed,'UniformOutput',false); for f=1:D %=== Get value of data within that window idxUsed = cellfun(@(x) strcmp(x,feats{f}), data_used(:,3), 'UniformOutput',false); idxUsedExist = cellfun(@any, idxUsed); idxData = f*R-1; % Index of storage in matrix of data for r=1:R v = cellfun(@(x,y) rfcn{r}(x(y)), data_used(idxUsedExist,4), idxUsed(idxUsedExist)); X(idxUsedExist,idxData+r) = v; end %=== Remove used features from data_used to speed up function data_used(idxUsedExist,2:4) = cellfun(@(x,y) x(~y),... data_used(idxUsedExist,2:4), repmat(idxUsed(idxUsedExist),1,3),... 'UniformOutput',false); end %=== Generate header for r=1:R rule{r} = [upper(rule{r}(1)), rule{r}(2:end)]; % capitalize first letter end if numel(T)==1 Tstr = num2str(T(1)); else if T(1) == 0 && T(2) == 2880 %=== don't output anything for simplicity Tstr = ''; else Tstr = [num2str(T(1)) 'to' num2str(T(2))]; end end % Reshape features to match data (feature 1 rule 1, feature 1 rule 2, etc) feats = repmat(feats',R,1); feats = feats(:); rule = repmat(rule,D,1); header = strcat(feats,rule); header = strcat(header,Tstr)'; end