function [ pwr ] = NicForest_CalculateGroupPower(xtrain,ytrain,opt) %NICFOREST_CALCULATEGROUPPOWER Calculate the power of the exponent for the %groupings, if provided % [ width ] = NicForest_CalculateGroupPower(xtrain,ytrain,opt) calculates a % reasonable starting value for the width parameter for the ensemble % forest development. % % Inputs: % xtrain - Training features % ytrain - Training targets % opt - Number of trees to be used in development % % Outputs: % opt.Width - Scalar initial value for the intercept prior's width % % Example % [ width ] = NicForest_CalculateGroupPower(xtrain) % % See also NICFOREST NICFOREST_TRAIN % $LastChangedBy: alistair $ % $LastChangedDate: 2012-05-30 12:21:30 +0100 (Wed, 30 May 2012) $ % $Revision: 21 $ % Originally written on GLNXA64 by Alistair Johnson, 24-May-2012 13:32:13 % Contact: alistairewj@gmail.com if isempty(opt.Group) || numel(unique(opt.Group))==1 %=== No groups input, do not calculate power // set it to 0 pwr=0; end Ntrees = opt.Trees; if strcmp(opt.Family,'binomial') likelihoodFcn = @ll; else likelihoodFcn = @normll; end %=== Do a quick MCMC to find a reasonable width opt = forest_opt_set(opt,... 'Iterations',20000,... 'Save',2000,... 'Resets', 1,... 'UpdatedTrees', 2,... 'BurnIn', 20); %=== Split into 2 folds + train 2 models group_num = unique(opt.Group); group_num = group_num(randperm(numel(group_num),numel(group_num))); idxSplit = false(size(ytrain,1),1); for k = 1: floor(numel(group_num)/2) idxSplit(opt.Group == group_num(k)) = true; end opt1 = opt; opt2 = opt; opt1.Group = opt.Group(idxSplit); opt2.Group = opt.Group(~idxSplit); %=== Loop across different powers pwr = 0:0.25:1; Npwr = numel(pwr); ll1 = zeros(Npwr,1); ll2 = zeros(Npwr,1); ypred = cell(2,Npwr); for k=1:Npwr fprintf('Beginning power value %g - repetition %g of %g\n',pwr(k),k,Npwr); %=== Select a new value for power opt1.GroupPower = pwr(k); opt2.GroupPower = pwr(k); %=== Train models [ forests1 ] = NicForest_train(xtrain(idxSplit,:),ytrain(idxSplit,:),opt1); [ forests2 ] = NicForest_train(xtrain(~idxSplit,:),ytrain(~idxSplit,:),opt2); %=== Evaluate models [ ypred1 ] = NicForest_apply_quick( forests1 , xtrain(~idxSplit,:) ); [ ypred2 ] = NicForest_apply_quick( forests2 , xtrain(idxSplit,:) ); %=== Save likelihoods ll1(k) = feval(likelihoodFcn,ypred1,ytrain(~idxSplit),opt2.Group); ll2(k) = feval(likelihoodFcn,ypred2,ytrain(idxSplit),opt1.Group); ypred{1,k} = ypred1; ypred{2,k} = ypred2; end ll1 ll2 [minLL,idxMin] = min(ll1+ll2); pwr = pwr(idxMin); end function [p] = invlogit(p) % inverse logit p = 1./(1+exp(-p)); end function [p] = ll(pred,tar,group) % log likelihood pred = invlogit(pred); p = sum((-tar.*log(pred)-(1-tar).*log(1-pred)).*group); end function [p] = normll(pred,tar,group) % log likelihood sigma=std(tar); p = log(sigma)*sum(group) + 0.5*sum(((tar-pred).^2).*group)/(sigma^2); end