function [ x_rk ] = tiedrankrelative(x, idxRankable) %TIEDRANKRELATIVE Performs tied ranking for all elements, relative to a subset of the elements % [ x_rk ] = tiedrankrelative(x, idxRankable) ranks all of the elements % in the vector x relative to the elements indexed by idxRankable. Thus, % elements not indexed by idxRankable are given a rank, but do not % influence the ranking of points. This is useful if you would, for % example, like to rank points from an unused test set relative to a % training set. % % If idxRankable is not supplied, then this function uses all elements to % calculate their ranking, averaging the ranks of tied elements. % % Note: this function may not work for vectors with Inf elements. % % If x is a matrix, the function operates column-wise. % % Inputs: % x - The input data elements. % idxRankable - Elements to be used in the ranking (i.e., % idxTraining). Must be a logical vector of indices. % % % Outputs: % x_rk - Points in x ranked according to the elements indexed % by idxRankable. % % % Example % load PhysionetDataSetA; % xtrain=data(1:500,:); % ytrain=outcome(1:500); % xtest=data(501:1000,:); % ytest=outcome(501:1000); % % idxTraining = [true(size(xtrain));... % false(size(xtest)); % [ x_rk ] = tiedrankrelative([xtrain;xtest], idxTraining); % % See also TIEDRANK % Copyright 2012 Alistair Johnson % $LastChangedBy: alistair $ % $LastChangedDate: 2012-05-15 12:26:18 +0100 (Tue, 15 May 2012) $ % $Revision: 1 $ % Originally written on GLNXA64 by Alistair Johnson, 30-Apr-2012 10:30:47 % Contact: alistairewj@gmail.com [N,P] = size(x); if nargin<2 idxTest = false(size(x)); else idxTest = ~idxRankable; end %=== Sort data [x_sorted, idxRk] = sort(x,1,'ascend'); %=== Create default rankings 1:N before ties x_nan = isnan(x_sorted); x_rk = repmat((1:N)',1,P); x_rk(x_nan) = NaN; %=== Get logical indices of ties idxTied = x_sorted; idxTied = ((x_sorted(2:end,:)-idxTied(1:end-1,:)) == 0); %=== Sort test indices and create adjustment vectors idxTestRk = idxTest(idxRk); adj1 = double(idxTestRk); % adjustment for -0.5 (observations == test) adj2 = double(idxTestRk); % adjustment for -1 (observations > test) for c=1:P %=== Get column specific numerical indices of ties idxTiedCol = [find(idxTied(:,c)==1);N+2]; N_Tied = numel(idxTiedCol); i=1; while i < N_Tied idxTieStart = idxTiedCol(i); nties = 2; %=== Count number of ties while (idxTieStart+nties-1 == idxTiedCol(i+1)) nties = nties+1; i=i+1; end %=== Take and impute average idxTieEnd = idxTiedCol(i)+1; idxTieAdjust = idxTieStart:idxTieEnd; x_rk(idxTieAdjust,c) = sum(x_rk(idxTieAdjust,c)) / nties; ntesttied = sum(idxTestRk(idxTieAdjust,c)); idxTrainTestTied = ~idxTestRk(idxTieAdjust,c); if any(idxTrainTestTied) && ntesttied>0 %=== Add -0.5 for training points tied with test points adj1(idxTieAdjust(idxTrainTestTied),c) = adj1(idxTieAdjust(idxTrainTestTied),c) + 1; end %=== If multiple tied points are from test set if ntesttied>0 adj2(idxTieAdjust,c) = 0; adj2(idxTieAdjust(end),c) = ntesttied; % number of tied test points if ntesttied>1 % must add more -0.5s to adj1 adj1(idxTieAdjust,c) = adj1(idxTieAdjust,c) + ntesttied-1; end end i=i+1; end end %=== Shift adj2 down and do cumulative sum adj2 = cumsum([zeros(1,size(adj2,2));adj2(1:end-1,:)],1); %=== Create linear indices idxLinearRk = ones(N,P); idxLinearRk = cumsum(idxLinearRk,2); idxLinearRk = sub2ind([N,P],idxRk,idxLinearRk); idxLinearRk = idxLinearRk(:); x_rk(idxLinearRk) = x_rk - adj1*0.5 - adj2; end