function [ ypred ] = NicForest_apply_quick( f_out, xtest ) % [ ypred ] = NicForest_apply( forest, xtrain, xtest , ytest) applies a % model in forests, originally trained on xtrain, to xtest. ytest is used % to determine if it is a binary or regression problem. % $LastChangedBy: alistair $ % $LastChangedDate: 2012-05-16 13:48:37 +0100 (Wed, 16 May 2012) $ % $Revision: 10 $ % Originally written on MACI64 by Louis Mayaud, 25-April-2012 14:05:26 % Contact: alistairewj@gmail.com forests = f_out.forests; xtrain = f_out.xtrain; xtrain_rk_normalized = f_out.xtrain_rk_normalized; xtrain_rk_normalized_sorted = f_out.xtrain_rk_normalized_sort; NaNNbre = f_out.TrainNaN; % number of non-nans in training data set N = size(xtest,1); idxTraining = [false(size(xtest,1),1);true(size(xtrain,1),1)]; % elements used for ranking xtest_rk = tiedrankrelative([xtest;xtrain],idxTraining); xtrain_rk = xtest_rk(size(xtest,1)+1:end,:); xtest_rk = xtest_rk(1:size(xtest,1),:); % remove training set %=== NORMALIZE DATA ===% %=== Scale ranks between 0->1 xtest_rk_normalized = bsxfun(@rdivide,xtest_rk,NaNNbre); xtest_rk_normalized = norminv(xtest_rk_normalized,0,1); [NForests , ~ , Ntrees] = size(forests); Ntrees = Ntrees -1 ; % remove the intercept % create prediction vector for each forest ypred=zeros(size(xtest_rk,1),NForests); % For each forest %=== Speed up function by pre-calculating certain values if isfield(f_out,'nan_placement') nan_placement = f_out.nan_placement; else f_out = BRF_CompactForest(f_out); nan_placement = f_out.nan_placement; end for i=1:NForests ypred(:,i)=ypred(:,i)+forests(i,1,Ntrees+1); forest = reshape(forests(i,:,:),[size(forests,2),size(forests,3)]); % pred = apply_tree_quick( forest, NaNNbre, xtest_rk, xtrain_rk, xtest_rk_normalized, xtrain_rk_normalized ); pred = apply_tree_final( forest, NaNNbre, nan_placement(:,:,i), xtest_rk, xtest_rk_normalized, xtrain_rk_normalized_sorted ); ypred(:,i)=ypred(:,i)+sum(pred,2); end %=== invlogit if binomial case %=== method: regression or classification if strcmp(f_out.Family,'binomial') ypred = invlogit(ypred); %=== sum across forests and normalize by number of forests ypred = sum(ypred,2)/NForests; else % un-normalize predictions using normcdf ypred = sum(ypred,2)/NForests; %=== re-scale to original values ypred = normcdf(ypred,0,1)*f_out.ynum; %=== use inverse ranking mapping to find actual predicted values %=== first get the ranks of the predictions/training values yrk = [f_out.yrk;ypred]; idxPred = [false(size(f_out.yrk));true(size(ypred))]; [yrk,idxSort] = sort(yrk,1,'ascend'); idxPred = idxPred(idxSort); %=== now order the actual y values in the training set similarly ytrain = zeros(numel(yrk),1); ytrain(~idxPred) = f_out.ytrain(idxSort(~idxPred)); %=== now calculate predictions using weighted average ypred_sorted = propogateValues(ytrain,yrk); idxUnsort = 1:numel(idxSort); idxUnsort(idxSort) = idxUnsort; ypred_sorted = ypred_sorted(idxUnsort); ypred = ypred_sorted(idxPred(idxUnsort)); end end function [x] = invlogit(x) x = 1./(1+exp(-x)); end function [ x ] = propogateValues(x,y) %PROPOGATEVALUES Carry non-zero values forward in vector % [ x ] = propogateValues(x) % % % Inputs: % x - A vector containing 0s and non-zeroes, in which the zeroes % should be replaced with the first prior non-zero value. % y - A vector containing ranks of x, with no 0s. % % Outputs: % x - A vector with 0s replaced by the first prior non-zero value. % % % Example % x = [0,1,0,0,6,0,0,0,5,0] % [ x ] = propogateValues(x) % % See also FIND DIFF % Copyright 2012 Alistair Johnson % $LastChangedBy: alistair $ % $LastChangedDate: 2012-05-16 13:48:37 +0100 (Wed, 16 May 2012) $ % $Revision: 10 $ % Originally written on GLNXA64 by Alistair Johnson, 08-May-2012 11:27:49 % Contact: alistairewj@gmail.com valind = find(x==0,1,'first'); %=== handle the extrenum if valind(1) == 1 end valind = find(x==0,1,'last'); if valind(end) == numel(x) end idx0 = x==0; x1 = x; x2 = flipud(x); valind1 = find(x1); valind2 = find(x2); %=== x1 = propogate values forward x1(valind1(2:end)) = diff(x1(valind1)); x1 = cumsum(x1); %=== x2 = propogate values backward x2(valind2(2:end)) = diff(x2(valind2)); x2 = cumsum(x2); x2=flipud(x2); %=== y1/y2 are the same but using the ranks y1=y; y1(idx0) = 0; y2=y; y2(idx0) = 0; y2=flipud(y2); y1(valind1(2:end)) = diff(y1(valind1)); y1 = cumsum(y1); y2(valind2(2:end)) = diff(y2(valind2)); y2 = cumsum(y2); y2=flipud(y2); %=== calculate weights using y values which are ranks y1=y1(idx0); y2=y2(idx0); x1=x1(idx0); x2=x2(idx0); y0 = y(idx0); x(idx0) = ((y0-y1).*x2 + (y2-y0).*x1)./(y2-y1); end