%TESTC Test classifier, error / performance estimation % % [E,C] = TESTC(A*W,TYPE) % [E,C] = TESTC(A,W,TYPE) % E = A*W*TESTC([],TYPE) % % [E,F] = TESTC(A*W,TYPE,LABEL) % [E,F] = TESTC(A,W,TYPE,LABEL) % E = A*W*TESTC([],TYPE,LABEL) % % INPUT % A Dataset % W Trained classifier mapping % TYPE Type of performance estimate, default: probability of error % LABEL Target class, default: none % % OUTPUT % E Error / performance estimate % C Number of erroneously classified objects per class. % They are sorted according to A.LABLIST % F Error / performance estimate of the non-target classes % % DESCRIPTION % This routine supplies several performance estimates for a trained % classifier W based on a test dataset A. It is possible to supply a cell % array of datasets {A*W}, or a cell array of datasets {A} or a cell array % of classifiers {W}. In case A as well as W is a cell array, W might be % 2-dimensional in with as many columns as A has datasets. See DISPERROR % for an example. % % A should contain test objects for every class assigned by W. % Objects in A belonging to different classes than defined for W as well % as unlabeled objects are neglected. Note that this implies that TESTC % applied to a rejecting classifier (e.g. REJECTC) estimates the % performance on the not rejected objects only. By % [E,C] = TESTC(A,W); E = (C./CLASSSIZES(A))*GETPRIOR(A)'; % the classification error with respect to all objects in A may be % computed. Use CONFMAT for an overview of the total class assignment % including the unlabeled (rejected) objects. % % In case of missing classes in A, [E,C] = TESTC(A*W) returns in E a NaN % but in C still the number of erroneously classified objects per class. % % If LABEL is given, the performance estimate relates just to that class as % target class. If LABEL is not given a class average is returned weighted % by the class priors. % % The following performance measures are supported for TYPE: % 'crisp' Expected classification error based on error counting, % weighted by the class priors (default). % 'FN' E False negative % F False positive % 'TP' E True positive % F True negative % 'soft' Expected classification error based on soft error % summation, i.e. a sum of the absolute difference between % classifier output and target, weighted by class priors. % 'F' Lissack and Fu error estimate % 'mse' Expected mean square difference between classifier output % and target (based on soft labels), weighted by class % priors. % 'auc' Area under the ROC curve (this is an error and not a % performance!). For multi class problems this is the % weigthed average (by class priors) of the one-against-rest % contributions of the classes. % 'precision' E Fraction of true target objects among the objects % classified as target. The target class is defined by LABEL. % Priors are not used. % F Recall, fraction of correctly classified objects in the % target class. Priors are not used. % 'sensitivity' E Fraction of correctly classified objects in the target % class (defined by LABEL). Priors are not used. % Sensitivity as used her is identical to recall. % F Specificity, fraction non target objects that are not % classified into the target class (defined by LABEL). % Priors are not used. % % EXAMPLES % See PREX_PLOTC. % % SEE ALSO (PRTools Guide) % MAPPINGS, DATASETS, CONFMAT, REJECTC % Copyright: R.P.W. Duin, r.p.w.duin@37steps.com % Faculty EWI, Delft University of Technology % P.O. Box 5031, 2600 GA Delft, The Netherlands % $Id: testc.m,v 1.19 2010/02/18 15:57:07 duin Exp $ function [OUT1,OUT2] = testc(a,w,type,label) if nargin < 4, label = []; end if nargin < 3, type = []; end if nargin < 2, w = []; end if nargin < 1, a = []; end if isstr(w) % takes care of testc(a*w,type,label) label = type; type = w; w = []; end % % if isempty(type) % type = 'crisp'; % end if (isempty(a)) % prepares a*testc([],w,type,label), or a*testc([],type,label) out1 = prmapping(mfilename,'fixed_cell',{w,type,label}); out1 = setname(out1,'testc'); out1 = setbatch(out1,0); % Don't run in batch mode out2 = []; elseif (~ismapping(w) & ~iscell(w)) | (ismapping(w) & isfixed(w) & strcmp(getname(w),'testc')) % call like testc(a*w,type,label), or a*testc([],w,type,label) which % results in testc(a*w,V) in which V = testc([],type,label) if ismapping(w) % retrieve parameters stored in testc([],w,type,label label = getdata(w,3); type = getdata(w,2); w = getdata(w,1); end if (iscell(a)) % If this argument is a cell array, recursively call this % function to get errors for all elements in the cell array. out1 = zeros(size(a)); out2 = cell(size(a)); % if isempty(w) for j1 = 1:size(a,1) for j2 = 1:size(a,2) [out1(j1,j2),out2{j1,j2}] = feval(mfilename,a{j1,j2},w,type,label); end end elseif (isdatafile(a)) % datafile needs some handling as we need to % process all objects separately c = getsize(a,3); out2 = zeros(1,c); next = 1; a = setprior(a,getprior(a)); while next > 0 [b,next] = readdatafile(a,next); if isempty(w) [out1,class_err] = feval(mfilename,prdataset(b)); else [out1,class_err] = feval(mfilename,b,w,type,label); end out2 = out2 + class_err; end if isempty(a.prior) out1 = sum(out2)/size(a,1); else p = getprior(a); csizes = classsizes(a); if any(csizes == 0) out1 = NaN; prwarning(1,'Some classses have no test objects') else out1 = (out2./classsizes(a))*p'; end end else % here we are for the real work isdataset(a); % we need a real dataset for evaluation if (ismapping(w) & istrained(w)) a = a*w; end fflab = renumlab(getfeatlab(a)); if any(fflab == 0) % reject option! Allowed? if isempty(strmatch(type,char('crisp','FN','TP','precision','sensitivity'))) error('Reject option incompatible with desired error measure') else % remove all objects to be rejected reject_col = find(fflab == 0); [ma,J] = max(+a,[],2); L = find(J~=reject_col); a = a(L,:); end end a = a*maxc; % takes care that every class appears as a single column in a %a = remclass(a); lablist = getlablist(a); % classes of interest featlab = getfeatlab(a); flab = renumlab(featlab,lablist); csizes = classsizes(a); if any(flab == 0) prwarning(1,'Some classes assigned by the classifier have no test objects') end if any (csizes == 0) if nargout < 2 | ~isempty(label) % no error / performance measure can be returned error('Some classes have no test objects') else % we can, however, return, the error per class prwarning(2,'Some classes have no test objects') c = getsize(a,3); I = matchlablist(a*labeld,lablist); nlab = getnlab(a); OUT2 = zeros(1,c); for j=1:c J = find(nlab==j); OUT2(j) = sum(I(J)~=j); end OUT1 = NaN; end return end clab = renumlab(lablist,featlab); if any(clab == 0) prwarning(1,'Superfluous test objects found, they will be neglected') J = find(clab~=0); a = seldat(a,J); a = setlablist(a); csizes = csizes(J); end [m,k,c] = getsize(a); p = getprior(a); labtype = getlabtype(a); if isempty(type) % set default error measure types if islabtype(a,'crisp') type = 'crisp'; elseif islabtype(a,'soft') type = 'soft'; else type = 'mse' end end confm = cmat(a); % compute confusion matrix if isempty(label) lablist = getlablist(a); out2 = (csizes - diag(confm)'); out = zeros(1,c); out1 = 0; for j = 1:c % compute crit one-against-rest % confm2 = [confm(j,j) sum(confm(j,:))-confm(j,j); sum(confm(:,j))-confm(j,j) ... % sum(confm(:))-sum(confm(:,j))-sum(confm(j,:)) + confm(j,j)]; confm2 = [confm(j,j) csizes(j)-confm(j,j); sum(confm(:,j))-confm(j,j) ... sum(confm(:))-sum(confm(:,j))-csizes(j) + confm(j,j)]; b = seldat(a,j); out(j) = comp_crit(type,confm2,a,j,lablist(j,:)); if isempty(a.prior) out1 = out1 + out(j); else out1 = out1 + p(j) * out(j) / size(b,1); end end if isempty(a.prior) out1 = out1 / m; end else n = getclassi(a,label); confm2 = [confm(n,n) sum(confm(n,:))-confm(n,n); sum(confm(:,n))-confm(n,n) ... sum(confm(:))-sum(confm(:,n))-sum(confm(n,:)) + confm(n,n)]; [out1,out2] = comp_crit(type,confm2,a,n,label); out1 = out1/csizes(n); out2 = out2/(m-csizes(n)); end end elseif (iscell(a)) || (iscell(w)) % If there are two input arguments and either of them is a cell array, % call this function on each of the cells. % Non-cell array inputs are turned into 1 x 1 cell arrays. if (~iscell(a)), a = {a}; end if (~iscell(w)), w = {w}; end if size(a,2) > 1 & size(w,1) == 1 % repeat w for all input datasets w = repmat(w,size(a,2),1); end if (min(size(a) > 1)) error('2D cell arrays of datasets not supported') end % Now call this function for each combination of % dataset A{I} and mappings W{I,J}. out1 = cell(numel(a),size(w,2)); out2 = out1; for i=1:numel(a) for j=1:size(w,2) [out1{i,j},out2{i,j}] = feval(mfilename,a{i}*w{i,j},type,label); end end else % Assert that the second argument is a trained mapping, and call % this function on the mapped data. ismapping(w); istrained(w); [out1,out2]= feval(mfilename,a*w,type,label); end % If there are no output arguments, display the error(s) calculated. % Otherwise, copy the calculated errors to the output arguments. if (nargout == 0) & (nargin > 0) if (iscell(a)) if (nargin == 1) || (isempty(w) && isempty(type) && isempty(label)) for j1 = 1:size(a,1) for j2 = 1:size(a,2) fprintf(1,'%6.4f %s on %15s\n',out1(j1,j2),... getname(a{j1,j2}),getuser(a{j1,j2},'evaluated_by')); %disp(['Mean classification error on ' ... %num2str(size(a{j1,j2},1)) ' test objects: ' num2str(out1(j1,j2))]); end end else fprintf('\n Test results for'); disperror(a,w(1,:),cell2mat(out1)); end else if (nargin == 1) disp(['Mean classification error on ' num2str(size(a,1)) ' test objects: ' num2str(out1)]) else if ~isempty(w) %DXD empty mapping can happen after a*w*testc fprintf(' %s',getname(w,20)); else %DXD is this a good alternative? fprintf(' %s',getname(a,20)); end fprintf(' %5.3f',out1); fprintf('\n'); end end else OUT1 = out1; OUT2 = out2; end return %TESTAUC Multiclass error area under the ROC % % E = TESTAUC(A*W) % E = TESTAUC(A,W) % E = A*W*TESTAUC % % INPUT % A Dataset to be classified % W Classifier % % OUTPUT % E Error, Area under the ROC % % DESCRIPTION % The area under the error ROC is computed for the datset A w.r.t. the % classifer W. The estimator is based on a rank analysis of the classifier % outcomes. Ties are broken by a two-way sorting and averaging. % % The multiclass situation is solved by averaging over all outcomes of % the one-against-rest ROCs. % % Note that E is an error and not a performance measure like the AUC often % used in literature. % % SEE ALSO (PRTools Guide) % DATASETS, MAPPINGS, TESTC, PRROC % Copyright: R.P.W. Duin, r.p.w.duin@37steps.com % Faculty EWI, Delft University of Technology % P.O. Box 5031, 2600 GA Delft, The Netherlands function e = testauc(a,w,n) if nargin < 3, n = []; end if (nargin == 0) | (isempty(a)) % No input arguments given: return mapping information. e = prmapping(mfilename,'fixed',{label}); e = setbatch(e,0); return elseif (nargin == 1 | isempty(w)) % Classification matrix already computed d = a; else % Compute classification matrix now d = a*w; end [m,k,c] = getsize(d); s = classsizes(d); if k == 1 % classifier with a single class outcome, make two for consistency d = [d 1-d]; k = 2; end if isempty(n) e = zeros(1,c); for j = 1:c e(j) = auc_one(d,s,j); end e = e*getprior(d)'; else e = auc_one(d,s,n); end return function e = auc_one(d,s,j) % compute AUC for class j versus rest m = size(d,1); lablist = getlablist(d); % class names n = findfeatlab(d,lablist(j,:)); % forward sorting [ds,J1] = sort(-d(:,n)); [j1,K1] = sort(J1); % backward sorting to solve ties [ds,J2] = sort(flipud(-d(:,n))); [j2,K2] = sort(J2); K2 = flipud(K2); % get all object indices for this class K = findnlab(d,j); % retrieve number of wrong pairs e = (sum(K1(K)) + sum(K2(K))-(s(j)*(s(j)+1)))/2; % error contribution e = e / ((m-s(j))*s(j)); return function confm = cmat(a) % simplified confusion matrix procedure, class order as in c.lablist % a should be a classification matrix with the same feature labels % (no doubles) as a.lablist lablist = getlablist(a); featlab = getfeatlab(a); N = getsize(a,3); flab = renumlab(featlab,lablist); nlab = getnlab(a); aa = +a; confm = zeros(N,N); for j=1:N J = find(nlab==j); [mx,K] = max(aa(J,:),[],2); confm(j,:) = histc(flab(K)',1:N); end function [out1,out2] = comp_crit(type,c,a,n,label) % c : 2 x 2 confusion matrix % a : classification data % n : relevant class switch type case 'crisp' out1 = c(1,2); out2 = c(2,1); case 'FN' out1 = c(1,2); out2 = c(2,1); % FP case 'TP' out1 = c(1,1); out2 = c(2,2); % TN case 'precision' out1 = c(1,1)/(c(1,1)+c(2,1)); out1 = out1*(c(1,1)+c(1,2)); % sum of per sample contributions out2 = c(1,1)/(c(1,1)+c(1,2)); % recall (=sensitivity) out2 = out2*(c(2,2)+c(2,1)); % sum of per sample contributions case 'sensitivity' out1 = c(1,1)/(c(1,1)+c(1,2)); out1 = out1*(c(1,1)+c(1,2)); % sum of per sample contributions out2 = c(2,2)/(c(2,1)+c(2,2)); % specificity out2 = out2*(c(2,2)+c(2,1)); % sum of per sample contributions case 'soft' % normalised difference between desired and real targets a = setlabtype(a,'soft')*classc; t = gettargets(a); k = findfeatlab(a,label); d = abs(+a(:,k) - t(:,n)); J = find(isnan(d)); d(J) = ones(size(J)); out1 = sum(d)/2; % needed for consistency as every error is counted twice %out1 = sum(d); out2 = []; case 'F' % Lissack and Fu error b = seldat(a,n)*classc; out1 = sum(1-max(+b,[],2)); out2 = []; case {'mse','MSE'} k = findfeatlab(a,label); b = seldat(a,n); out1 = sum((+b(:,k)-gettargets(b)).^2); case {'nmse','NMSE'} % use normalised outputs k = findfeatlab(a,label); b = seldat(a,n)*classc; out1 = sum((+b(:,k)-gettargets(b)).^2); case {'auc','AUC'} out1 = testauc(a,[],n)*(c(1,1)+c(1,2));% sum of per sample contributions out2 = []; otherwise error('Error / performance type not found') end