function [ ypred ] = NicForest_apply( f_out, xtest ) % [ ypred ] = NicForest_apply( forest, xtrain, xtest , ytest) applies a % model in forests, originally trained on xtrain, to xtest. ytest is used % to determine if it is a binary or regression problem. % $LastChangedBy: alistair $ % $LastChangedDate: 2012-05-16 13:48:37 +0100 (Wed, 16 May 2012) $ % $Revision: 10 $ % Originally written on MACI64 by Louis Mayaud, 25-April-2012 14:05:26 % Contact: alistairewj@gmail.com forests = f_out.forests; xtrain = f_out.xtrain; NaNNbre = f_out.TrainNaN; % number of non-nans in training data set N = size(xtest,1); idxTraining = [false(size(xtest,1),1);true(size(xtrain,1),1)]; % elements used for ranking xtest_rk = tiedrankrelative([xtest;xtrain],idxTraining); xtest_rk = xtest_rk(1:size(xtest,1),:); % remove training set %=== NORMALIZE DATA ===% %=== Scale ranks between 0->1 xtest_rk_normalize = bsxfun(@rdivide,xtest_rk,NaNNbre); xtest_rk_normalize = norminv(xtest_rk_normalize,0,1); [NForests , ~ , Ntrees] = size(forests); Ntrees = Ntrees -1 ; % remove the intercept % create prediction vector for each forest ypred=zeros(size(xtest_rk,1),NForests); % For each forest for i=1:NForests ypred(:,i)=ypred(:,i)+forests(i,1,Ntrees+1); val = zeros(N,1); for j=1:Ntrees % For each tree val= val + apply_tree( forests(i,:,j) , xtest_rk , NaNNbre ); end ypred(:,i)=ypred(:,i)+val; end %=== invlogit if binomial case %=== method: regression or classification if strcmp(f_out.Family,'binomial') ypred = invlogit(ypred); %=== sum across forests and normalize by number of forests ypred = sum(ypred,2)/NForests; else % un-normalize predictions using normcdf ypred = sum(ypred,2)/NForests; %=== re-scale to original values ypred = normcdf(ypred,0,1)*f_out.ynum; %=== use inverse ranking mapping to find actual predicted values %=== first get the ranks of the predictions/training values yrk = [f_out.yrk;ypred]; idxPred = [false(size(f_out.yrk));true(size(ypred))]; [yrk,idxSort] = sort(yrk,1,'ascend'); idxPred = idxPred(idxSort); %=== now order the actual y values in the training set similarly ytrain = zeros(numel(yrk),1); ytrain(~idxPred) = f_out.ytrain(idxSort(~idxPred)); %=== now calculate predictions using weighted average ypred_sorted = propogateValues(ytrain,yrk); idxUnsort = 1:numel(idxSort); idxUnsort(idxSort) = idxUnsort; ypred_sorted = ypred_sorted(idxUnsort); ypred = ypred_sorted(idxPred(idxUnsort)); end end function [x] = invlogit(x) x = 1./(1+exp(-x)); end function [ x ] = propogateValues(x,y) %PROPOGATEVALUES Carry non-zero values forward in vector % [ x ] = propogateValues(x) % % % Inputs: % x - A vector containing 0s and non-zeroes, in which the zeroes % should be replaced with the first prior non-zero value. % y - A vector containing ranks of x, with no 0s. % % Outputs: % x - A vector with 0s replaced by the first prior non-zero value. % % % Example % x = [0,1,0,0,6,0,0,0,5,0] % [ x ] = propogateValues(x) % % See also FIND DIFF % Copyright 2012 Alistair Johnson % $LastChangedBy: alistair $ % $LastChangedDate: 2012-05-16 13:48:37 +0100 (Wed, 16 May 2012) $ % $Revision: 10 $ % Originally written on GLNXA64 by Alistair Johnson, 08-May-2012 11:27:49 % Contact: alistairewj@gmail.com valind = find(x==0,1,'first'); %=== handle the extrenum if valind(1) == 1 end valind = find(x==0,1,'last'); if valind(end) == numel(x) end idx0 = x==0; x1 = x; x2 = flipud(x); valind1 = find(x1); valind2 = find(x2); %=== x1 = propogate values forward x1(valind1(2:end)) = diff(x1(valind1)); x1 = cumsum(x1); %=== x2 = propogate values backward x2(valind2(2:end)) = diff(x2(valind2)); x2 = cumsum(x2); x2=flipud(x2); %=== y1/y2 are the same but using the ranks y1=y; y1(idx0) = 0; y2=y; y2(idx0) = 0; y2=flipud(y2); y1(valind1(2:end)) = diff(y1(valind1)); y1 = cumsum(y1); y2(valind2(2:end)) = diff(y2(valind2)); y2 = cumsum(y2); y2=flipud(y2); %=== calculate weights using y values which are ranks y1=y1(idx0); y2=y2(idx0); x1=x1(idx0); x2=x2(idx0); y0 = y(idx0); x(idx0) = ((y0-y1).*x2 + (y2-y0).*x1)./(y2-y1); end