function [ d1,d2 ] = pniExtractFeaturesC(data) %PNILOUIS Louis's initial entry - severity of illness score % [ pred ] = pniLouis(data) calculates a mortality prediction for each % each row (observation/subject) in data % % The score uses the following variables: % urine, platelets, BUN, creatinine, PaFi ratio, PaO2, PaCO2, pH, % heart_rate, temperature, BP, and age. % % Inputs: % data - Cell array of data. % Column 1 - Subject IDs % Column 2 - Time stamp vectors for each subject % Column 3 - Feature name vectors for each subject % Column 4 - Data value vectors for each subject % % Outputs: % pred - Column vector of predictions % % Example % %=== Load data in % load('data_processed_cell.mat'); % % %=== Calculate score % [ score ] = pniAndrew(data); % % See also PNMAIN PNPREPROCESSDATA % References: % Physionet Challenge 2012 % http://physionet.org/challenge/2012/ % % Copyright 2012 Alistair Johnson % $LastChangedBy: alistair $ % $LastChangedDate: 2012-04-25 01:26:50 +0100 (Wed, 25 Apr 2012) $ % $Revision: 344 $ % Originally written on GLNXA64 by Alistair Johnson, 15-Apr-2012 14:40:13 % Contact: alistairewj@gmail.com %=== Extract the following features from processed data: % Mean over Day-1 (non-linear transformation) % HR % Mg % Na % Weight % H2Odeficit % WBC % HCT (sqr) % % Mean over Day-2 (non-linear transformation) % GCS % BUN (logabs) % dUrineDt % Temp (abs) % Creatinine % BUNCreatR (sqr) % Platelets (sqr) % OOR_dUrineDt % Glucose % Na (abs) % WBC % H2Odeficit % % D1var is a binary vector saying which of the variables belongs to Day-1. records = data(:,1); N = numel(records); % number of observations [dataDesc,dataFixed] = pnDataDescriptions; % Output data descriptions varNames = [dataFixed(:,1);dataDesc(:,1)]'; Data=cell(N,2); Data(:,1) = records; for v=1:length(varNames) fieldName = varNames{v}; s = pnExtractField(data,fieldName); idxImpute = ~cellfun(@isempty, s(:,2)); Data(idxImpute,2) = cellfun(@(x,y,z) setfield(x,fieldName,[y,z]),... Data(idxImpute,2), s(idxImpute,2), s(idxImpute,4),... 'UniformOutput',false); % % %=== Print percent completion % checkpoint = mod(v,ceil(length(varNames)*0.25)); % if checkpoint==0 % fprintf('%2.0f%% complete.\n',floor(v/length(varNames) * 100)); % end % end % Mean over Day-1 (non-linear transformation) % HR % Mg % Na % Weight % H2Odeficit % WBC % HCT (sqr) % Mean over Day-2 (non-linear transformation) % GCS % BUN (logabs) % dUrineDt % Temp (abs) % Creatinine % BUNCreatR (sqr) % Platelets (sqr) % OOR_dUrineDt % Glucose % Na (abs) % WBC % H2Odeficit Stats = cell(size(Data)); Stats(:,1) = Data(:,1); %=== Already generated values, split into day1 and day2 d1h = {'HR','Mg','Na','Weight','WBC'}; d1h_L = numel(d1h); d2h = {'GCS','dUrineDt','Creatinine','Glucose','WBC'}; d2h_L = numel(d2h); Feats1 = cell(size(Data)); Feats2 = cell(size(Data)); % day1 Feats1(:,1) = Data(:,1); Feats2(:,1) = Data(:,1); %day2 d1h_full = [d1h,'H2Odeficit','HCT_sqrt']; d2h_full = [d2h,'BUN_logabs','dUrineDt','Temp_abs','BUNCreatR_sqrt',... 'Platelets_sqrt','OOR_dUrineDt','Na_abs','H2Odeficit']; for r = 1:N %== Compute new temporal features %=== Easy to extract features for k=1:d1h_L if isfield(Data{r,2},d1h{k}) Feats1{r,2}.(d1h{k}) = Data{r,2}.(d1h{k}); end end for k=1:d2h_L if isfield(Data{r,2},d2h{k}) Feats2{r,2}.(d2h{k}) = Data{r,2}.(d2h{k}); end end %=== Features which require just a bit of transformation % Day 1 if isfield(Data{r,2},'HCT') Feats1{r,2}.HCT_sqrt = [Data{r,2}.HCT(:,1),sqrt(Data{r,2}.HCT(:,2))]; end % Day 2 if isfield(Data{r,2},'BUN') Feats2{r,2}.BUN_logabs = [Data{r,2}.BUN(:,1),log(abs(Data{r,2}.BUN(:,2)))]; end if isfield(Data{r,2},'Temp') Feats2{r,2}.Temp_abs = [Data{r,2}.Temp(:,1),abs(Data{r,2}.Temp(:,2))]; end if isfield(Data{r,2},'Platelets') Feats2{r,2}.Platelets_sqrt = [Data{r,2}.Platelets(:,1),sqrt(Data{r,2}.Platelets(:,2))]; end if isfield(Data{r,2},'Na') Feats2{r,2}.Na_abs = [Data{r,2}.Na(:,1),abs(Data{r,2}.Na(:,2))]; end %=== New physiological features % H2Odeficit % dUrineDt % BUNCreatR (sqr) % OOR_dUrineDt % H2Odeficit % H2Odeficit if isfield(Data{r,2},'Weight') && isfield(Data{r,2},'Na') Data{r,2}.H2Odeficit(:,2) = 0.6 * Data{r,2}.Weight(1,2) * ((Data{r,2}.Na(:,2) / 140) - 1); Data{r,2}.H2Odeficit(:,1) = Data{r,2}.Na(:,1); Feats1{r,2}.H2Odeficit = Data{r,2}.H2Odeficit; Feats2{r,2}.H2Odeficit = Data{r,2}.H2Odeficit; end % dUrineDt if isfield(Data{r,2},'Urine') if length(Data{r,2}.Urine)>2 dUrineDt = (cumtrapz(Data{r,2}.Urine(:,1),Data{r,2}.Urine(:,2))./Data{r,2}.Urine(:,1)); % remove first Feats2{r,2}.dUrineDt(:,2) = dUrineDt; Feats2{r,2}.dUrineDt(:,1) = Data{r,2}.Urine(:,1); end end % BUNCreatR (sqr) Feats2{r,2}.BUNCreatR = extractBUNCreatRate(Data{r,2}); % OOR_dUrineDt range.dUrineDt = {NaN,30}; OOR = []; if isfield(Feats2{r,2},'dUrineDt') for t=1:15:(24*60*2) % Scroll over 2 days % look at 2hrs time window timeWin = find( Feats2{r,2}.dUrineDt(:,1) > t & Feats2{r,2}.dUrineDt(:,1) < t + 120 ); OORtmp = sum(find( Feats2{r,2}.dUrineDt(timeWin,2) < range.dUrineDt{1} ... | Feats2{r,2}.dUrineDt(timeWin,2) > range.dUrineDt{2} ) ); if ~isempty(OORtmp) OOR(end+1,:) = [t OORtmp]; end end end if ~isempty(OOR) Feats2{r,2}.OOR_dUrineDt = OOR; end %=== Remove data from Feats1 from day2, and vice versa for k=1:numel(d1h_full) if isfield(Feats1{r,2},d1h_full{k}) idxRem = Feats1{r,2}.(d1h_full{k})(:,1) > 1440; Feats1{r,2}.(d1h_full{k})(idxRem,:) = []; if isempty(Feats1{r,2}.(d1h_full{k})) Feats1{r,2} = rmfield(Feats1{r,2},d1h_full{k}); end end end for k=1:numel(d2h_full) if isfield(Feats2{r,2},d2h_full{k}) idxRem = Feats2{r,2}.(d2h_full{k})(:,1) < 1441; Feats2{r,2}.(d2h_full{k})(idxRem,:) = []; if isempty(Feats2{r,2}.(d2h_full{k})) Feats2{r,2} = rmfield(Feats2{r,2},d2h_full{k}); end end end % % %=== Print percent completion % checkpoint = mod(r,ceil(N*0.25)); % if checkpoint==0 % fprintf('%2.0f%% complete.\n',floor(r/N * 100)); % end end %=== Extract mean day1 and day2 values for each feature d1h_L = numel(d1h_full); d2h_L = numel(d2h_full); d1=nan(N,d1h_L); d2=nan(N,d1h_L); for r=1:N for k=1:d1h_L if isfield(Feats1{r,2},d1h_full{k}) d1(r,k) = mean(Feats1{r,2}.(d1h_full{k})(:,2)); end end for k=1:d2h_L if isfield(Feats2{r,2},d2h_full{k}) d2(r,k) = mean(Feats2{r,2}.(d2h_full{k})(:,2)); end end end end function [BUNCreatR] = extractBUNCreatRate(record) BUNCreatR = []; for t=1:(60*4):(24*60*2) % Scroll over 2 days % Every 4 hours if isfield(record,'BUN') BUNc = pinterp(record.BUN,t,28*60); else BUNc = NaN; end if isfield(record,'Creatinine') Creatc = pinterp(record.Creatinine,t,28*60); else Creatc = NaN; end if sum(isnan([BUNc Creatc]))==0 BUNCreatR(end+1,:) = [t get_BUNCreatR(BUNc,Creatc)]; end end if ~isempty(BUNCreatR) BUNCreatR(:,2) = sqrt(BUNCreatR(:,2)); end end function BUNCreatR = get_BUNCreatR(BUN,Creat) BUNCreatR = BUN/Creat; end