%NUSVO Support Vector Optimizer: NU algorithm
%
%   [V,J,NU,C] = NUSVO(K,NLAB,NU,OPTIONS)
%
% INPUT
%   K     Similarity matrix
%   NLAB  Label list consisting of -1/+1
%   NU    Regularization parameter (0 < NU < 1): expected fraction of SV (optional; default: 0.01)
%   OPTIONS
%    .PD_CHECK              force positive definiteness of the kernel by adding a small constant 
%                           to a kernel diagonal (default: 1)
%    .BIAS_IN_ADMREG        it may happen that bias of svc (b term) is not defined, then 
%                           if BIAS_IN_ADMREG == 1, b will be taken from its admissible
%                           region (if the region is bounded, the midpoint will be used);  
%                           if BIAS_IN_ADMREG == 0 the situation will be considered as 
%                           an optimization failure and treated accordingly (deafault: 1)
%    .ALLOW_UB_BIAS_ADMREG  it may happen that bias admissible region is unbounded 
%                           if ALLOW_UB_BIAS_ADMREG == 1, b will be heuristically taken 
%                           from its admissible region, otherwise(ALLOW_UB_BIAS_ADMREG == 0) 
%                           the situation will be considered as an optimization failure and 
%                           treated accordingly (deafault: 1)
%    .PF_ON_FAILURE         if the optimization is failed (optimizer did not converge, or there are 
%                           problems with finding of the bias term and PF_ON_FAILURE == 1, 
%                           then Pseudo Fisher classifier will be computed, 
%                           otherwise (PF_ON_FAILURE == 0) an error will be issued (default: 1)
%
%
% OUTPUT
%   V     Vector of weights for the support vectors
%   J     Index vector pointing to the support vectors
%   NU    NU parameter (useful when NU was automatically selected)
%   C     C regularization parameter of SVC algorithm, which gives the same classifier
%
%
%
% DESCRIPTION
% A low level routine that optimizes the set of support vectors for a 2-class
% classification problem based on the similarity matrix K computed from the
% training set. SVO is called directly from SVC. The labels NLAB should indicate 
% the two classes by +1 and -1. Optimization is done by a quadratic programming. 
% If available, the QLD function is used, otherwise an appropriate Matlab routine.
%
% NU is bounded from above by NU_MAX = (1 - ABS(Lp-Lm)/(Lp+Lm)), where
% Lp (Lm) is the number of positive (negative) samples. If NU > NU_MAX is supplied 
% to the routine it will be changed to the NU_MAX.
%
% If NU is less than some NU_MIN which depends on the overlap between classes 
% algorithm will typically take long time to converge (if at all). 
% So, it is advisable to set NU larger than expected overlap.
%
% Weights V are rescaled in a such manner as if they were returned by SVO with the parameter C.
%
% SEE ALSO
% SVC_NU, SVO, SVC

% Copyright: S.Verzakov, s.verzakov@ewi.tudelft.nl 
% Based on SVO.M by D.M.J. Tax, D. de Ridder, R.P.W. Duin
% Faculty EWI, Delft University of Technology
% P.O. Box 5031, 2600 GA Delft, The Netherlands

% $Id: nusvo.m,v 1.3 2010/02/08 15:29:48 duin Exp $

function [v,J,nu,C] = nusvo_new(K,y,nu,Options,arg)
                 % old call: K,y,nu,pd,abort_on_error
	  prtrace(mfilename);

  % Old call conventions
  if nargin > 4 | (nargin == 4 & ~isstruct(Options) & isempty(Options))
    abort_on_error = [];
    if nargin == 5 
      abort_on_error = arg;  
    end

    pd = [];
    if nargin >= 4 
      pd = Options;
    end

    clear Options
    Options.pd_check             = pd;
    Options.bias_in_admreg       = 1;
    Options.allow_ub_bias_admreg = 1;
    Options.pf_on_failure        = ~abort_on_error;
  end    

  if nargin < 4
    Options = [];
  end


  DefOptions.pd_check             = 1;
  DefOptions.bias_in_admreg       = 1;
  DefOptions.allow_ub_bias_admreg = 1;
  DefOptions.pf_on_failure        = 1;

  Options = updstruct(DefOptions,Options,1);

  if nargin < 3
		prwarning(3,'The regularization parameter NU is not specified, assuming 0.01.');
    nu = 0.01;
	end

  nu_max = 1 - abs(nnz(y == 1) - nnz(y == -1))/length(y);
  nu_max = nu_max*(1-1e-6); % to be on a safe side
  
  if nu > nu_max
    prwarning(3,['nu==' num2str(nu)  ' is not feasible; set to ' num2str(nu_max)]);
    nu = nu_max;
  end 

  vmin = 1e-9;		% Accuracy to determine when an object becomes the support object.

	% Set up the variables for the optimization.
	n  = size(K,1);
	D  = (y*y').*K;
	f  = zeros(1,n);
	A  = [y';ones(1,n)];
	b  = [0; nu*n];
	lb = zeros(n,1);
	ub = ones(n,1);
	p  = rand(n,1);
  D = (D+D')/2; % guarantee symmetry
  if Options.pd_check
  	% Make the kernel matrix K positive definite.
	  i = -30;
  	while (pd_check (D + (10.0^i) * eye(n)) == 0)
	  	i = i + 1;
  	end
  	if (i > -30),
	  	prwarning(2,'K is not positive definite. The kernel is regularized by adding 10.0^(%d)*I',i);
  	end
	  i = i+2;
  	D = D + (10.0^(i)) * eye(n);
	end
   
  % Minimization procedure initialization:
	% 'qp' minimizes:   0.5 x' D x + f' x
	% subject to:       Ax <= b
	%
% 	if (exist('qld') == 3)
% 		v = qld (D, f, -A, b, lb, ub, p, length(b));
% %         v =[];
% 
%         if (isempty(v) && exist('quadprog') == 2)
%             opts = optimset('Algorithm','active-set','Display','off');
%             v = quadprog(D, f, [], [], A, b, lb, ub, [], opts);
% 
%             if isempty(v)
%                 opts = optimset('Algorithm','trust-region-reflective','Display','off');
%                 v = quadprog(D, f, [], [], A, b, lb, ub, [], opts);
%             end
%         end
% 	elseif (exist('quadprog') == 2)
	if (exist('quadprog') == 2)
% 		prwarning(1,'QLD not found, the Matlab routine QUADPROG is used instead.')
% 		v = quadprog(D, f, [], [], A, b, lb, ub);
        
        opts = optimset('Algorithm','active-set','Display','off');
		v = quadprog(D, f, [], [], A, b, lb, ub, [], opts);
        
        if isempty(v)
            opts = optimset('Algorithm','trust-region-reflective','Display','off');
            v = quadprog(D, f, [], [], A, b, lb, ub, [], opts);
        end
        
	else 
		prwarning(1,'QLD not found, the Matlab routine QP is used instead.')
		verbos = 0;
		negdef = 0;
		normalize = 1;
		v = qp(D, f, A, b, lb, ub, p, length(b), verbos, negdef, normalize);
  end
  
  try
    % check if the optimizer returned anything
    if isempty(v)
      error('prtools:nusvo_new','Optimization did not converge.');
    end

	  % Find all the support vectors.
	  J = find(v > vmin);
    Jp = J(y(J) ==  1);
    Jm = J(y(J) == -1);

    % Sanity check: if there are support objects from both classes
    if isempty(J)
      error('There are no support objects.');
    elseif isempty(Jp)
      error('There are no support objects from the positive class.');
    elseif isempty(Jm)
      error('There are no support objects from the negative class.');
    end

    % Find the SV on the boundary
	  I  = J(v(J) < 1-vmin);
	  Ip = I(y(I) ==  1);
	  Im = I(y(I) == -1);

    % Include class information into object weights
    v = y.*v;  

    [rho, b] = FindRhoAndB(nu,y,K,v,J,Jp,Jm,Ip,Im,Options);
    
    v = [v(J); b]/rho;
    C = 1/rho;   

  catch
    if Options.pf_on_failure
      prwarning(1,[lasterr ' Pseudo-Fisher is computed instead.']);
      n = size(K,1);
      %v = prpinv([K ones(n,1)])*y;
      v = prpinv([K ones(n,1); ones(1,n) 0])*[y; 0];
      J = [1:n]';
	    C = nan;		  
    else
      rethrow(lasterror);
    end
  end  

return;


function [rho, b] = FindRhoAndB(nu,y,K,v,J,Jp,Jm,Ip,Im,Options)
  % There are boundary support objects from both classes, we can use them to find a bias term
  if ~isempty(Ip) & ~isempty(Im)
    % r1 = rho-b
    r1 =  mean(K(Ip,J)*v(J)); 
    
    % r2 = rho+b
    r2 = -mean(K(Im,J)*v(J)); 

  else 
    % Boundary support objects are absent at least in one of classes

    n = length(v);
    
    % non SV, we need them to resctric the addmissible region from above
    J0    = (1:n)';
    J0(J) = [];
    J0p   = J0(y(J0) ==  1);
    J0m   = J0(y(J0) == -1);

    % Only margin errors SV exist
    if isempty(Ip) & isempty(Im)

      % If only margin error SV's are present then it has to be true 
      % nSVp = nSVm, nu*n = nSVp + nSVm, it means that
      % rho and b should be in the region
      % lb1 <= rho-b <= ub1,  lb2 <= rho+b <= ub2
      % rho > 0 (rho = 0 corresponds to the zero margin solution, and cannot be 
      % interpreted as a standard SVC)
      
      if length(Jp) ~= length(Jm)
        error('Inconsistency: only margin errors SV exist and nSVp ~= nSVm.');
      elseif nu*n ~= length(J) 
        error('Inconsistency: only margin errors SV exist and nu*n ~= nSV.');
      elseif ~Options.bias_in_admreg
        error('The bias term is undefined.');
      end

      % suport objects are always exist, the region is alway bounded from below
      % if there are no nonSV, the region is unbounded from above

      % r1 = rho-b
      lb1 = max(K(Jp,J)*v(J));
      ub1 = min([K(J0p,J)*v(J); inf]);

      % r2 = rho+b
      lb2 = -min(K(Jm,J)*v(J));
      ub2 = -max([K(J0m,J)*v(J); -inf]);
      
      if lb1 > ub1 | lb2 > ub2 
        error('prtools:nusvo_new','Admissible region of the bias term is empty.');    
      end
    
      if isinf(ub1) | isinf(ub2)
        Msg = 'Admissible region of the bias term is unbounded';
        if Options.allow_ub_bias_admreg
          prwarning(1,Msg);
        else
          error('prtools:nusvo_new',Msg);
        end      
      end
      
      % admissible region is a a part of 
      % domain [lb1; ub1] x [lb2; ub2] which is above r2 = -r1 line
      %
      % we put (r1,r2) on a rectangle diagonal connecting upper right and bottom left 
      % corners half way from the point of intersection between this diagonal and line r2 = -r1; 
      %

      % first we make a rectangle bounded (put upper right corner above r2 = -r1 line)
      if isinf(ub1) 
        ub1 = max(-lb2,lb1)+1;
      end

      if isinf(ub2) 
        ub2 = max(-lb1,lb2)+1;
      end
      
      r1_intersection = (ub2*lb1 - ub1*lb2)/(ub2-lb2 + ub1-lb1);
      r2_intersection =  -r1_intersection; 
      
      % it may happen that intersecton does not exist: 
      %
      % 1) the whole domain is above line r2 = -r1, 
      % then we use bottom left corner insted of it 
      %
      % 2) the whole domain is below line r2 = - r1, it means that there is no solution 
      % with positive margin and the condition r1+r2 > 0 will be violated (we check it at the end)
      lb1 = max(lb1,r1_intersection);
      lb2 = max(lb2,r2_intersection);
      
      r1 = (lb1 + ub1)/2;
      r2 = (lb2 + ub2)/2;

    % Only margin errors SV are present in the negative class
    elseif ~isempty(Ip) & isempty(Im)
      % r1 = rho-b
      r1 =  mean(K(Ip,J)*v(J)); %  rho-b

      % r2 = rho+b
      lb2 = -min(K(Jm,J)*v(J));
      ub2 = -max([K(J0m,J)*v(J); -inf]);
      
      if lb2 > ub2 
        error('The admissible region of the bias term is empty.');    
      end
      
      % we need to minimize(2*mSVm - nu*n)*r2
      % s.t. lb2 <= r2 <= ub2, r2 > -r1
      coeff_sign = 2*length(Jm) - nu*n; 
      
      if coeff_sign > 0
        r2 = lb2; % put r2 to the lowest value, it has to be large than -r1 

      elseif coeff_sign < 0
        r2 = ub2;
        % If coeff_sign <0 it means, that nu*n > 2*mSVm 
        % Also, nu*n <= nu_max*n = n-abs(np-nm) = 2*min(np,nm)
        % mSVm < min(np,nm) <= nm
        % thus nm-mSVm = nonSVm+bSVm = nonSVm > 0 and ub2 < inf
        
      else % coeff_sing == 0
        if ~Options.bias_in_admreg
          error('The bias term is undefined.');
        end

        % correcting lb2 in such a way that rho >= 0, for (r1,lb2), 
        % it may happen that lb2 > ub2, let's check it later (r1+r2 > 0)
        lb2 = max(lb2,-r1);
        if isinf(ub2)
          Msg = 'Admissible region of the bias term is unbounded';
          if Options.allow_ub_bias_admreg
            prwarning(1,Msg);
            ub2 = lb2 + 1;
          else
            error(Msg);
          end      
        end
        r2 = (lb2+ub2)/2;
      end

    % Only margin errors SV are present in the positive class
    elseif isempty(Ip) & ~isempty(Im)
      % r1 = rho-b
      lb1 = max(K(Jp,J)*v(J));
      ub1 = min([K(J0p,J)*v(J); inf]);

      % r2 = rho+b
      r2 = -mean(K(Im,J)*v(J)); 
      
      if lb1 > ub1
        error('The admissible region of the bias term is empty.');    
      end
      
      % we need to minimize(2*mSVp - nu*n)*r1
      % s.t. lb1 <= r1 <= ub1, r2 > -r1
      coeff_sign = 2*length(Jp) - nu*n; 
      
      if coeff_sign > 0
        r1 = lb1; % put r1 to the lowest value, it has to be large than -r2 

      elseif coeff_sign < 0
        r1 = ub1; 
        % If coeff_sign <0 it means, that nu*n > 2*mSVp 
        % Also, nu*n <= nu_max*n = n-abs(np-nm) = 2*min(np,nm)
        % mSVp < min(np,nm) <= np
        % thus np-mSVp = nonSVp+bSVp = nonSVp > 0 and ub1 < inf
        
      else % coeff_sing == 0
        if ~Options.bias_in_admreg
          error('The bias term is undefined.');
        end

        % correcting lb1 in such a way that rho >= 0, for (lb1,r2), 
        % it may happen that lb1 > ub1, let's check it later (r1+r2 > 0)
        lb1 = max(lb1,-r2);
        if isinf(ub1)
          Msg = 'The admissible region of the bias term is unbounded.';
          if Options.allow_ub_bias_admreg
            prwarning(1,Msg);
            ub2 = lb1 + 1;
          else
            error(Msg);
          end      
        end
        r1 = (lb1+ub1)/2;
      end
    end

    if r1 + r2 <= 0
      error('The solution with the positive margin does not exist.');
    elseif isinf(r1) | isinf(r2)
      % This should never happen, because nu <=nu_max
      error('The contradiction is detected. Although nu <=nu_max, the rho (functional margin size) is infinite');
    end
  end

  rho = (r2+r1)/2;
  b   = (r2-r1)/2;

return