function B= teach_log_reg(X,Y,R,Lam) % this function fits binary logistic regresion parameters to data X(isnan(X))=0; N=size(X,1); D=size(X,2); w=.01*ones(D,1); v=.01*ones(D,1); b=0.01; x0=[w;v;b]; % 2xD +1 parameter vector %Lambda=0.2; %x=maxlikelihood(X,Y,R,x0); x=cg2(x0); % x_=[-5:1:5]; % y_=[-5:1:5]; % z_=[-5:1:5]; % % x_val=[-3:.2:3]; % % y_val=[-50:5:50]; % % % % VAL=zeros(length(x_val),length(y_val)); % % for x_=1:length(x_val) % % for y_=1:length(y_val) % % VAL(x_,y_)=F(X,Y,R,[x_val(x_),0,y_val(y_)]); % % % % VAL(x_,y_)=objFunc([x_val(x_),0,y_val(y_)]); % % % % end % % end % % mesh(x_val,y_val,VAL') % % xlabel('x') % % % % b=F(X,Y,R,x); %x = fminsearch(@F,x0) % x2 = fminsearch(@(x) F(X,Y,R,x),x0); pause(.1) % no gradient used tic [x2,fval,exitflag] = fminsearch(@(x) objFunc(x),x0,optimset('TolX',1e-7,'MaxFunEvals',1e5,'MaxIter',1e5)) B=prob_dens_f(x); t(1)=toc % % with cradient %options = optimset('GradObj','on','TolX',1e-5,'MaxFunEvals',1e4,'MaxIter',1e4); [x,fval] = fminunc(@myfun,x0,optimset('GradObj','on','TolX',1e-5,'MaxFunEvals',1e4,'MaxIter',1e4)) % B=prob_dens_f(x); % t(2)=toc % x1=x0; % x1(30)=x1(30)+0.01; % dx_test=(objFunc(x0)-objFunc(x1))./(x0(30)-x1(30)); % dx_=gradFunc(x0); function [f,g] = myfun(x) f = objFunc(x); % Cost function if nargout > 1 g=gradFunc(x); end end % conjugate gradient minimization function B=prob_dens_f(x) D_=size(X,2); N_=size(X,1); B=zeros(N_,2); w_=x(1:D_); v_=x(D_+1:2*D_); b_=x(2*D_+1); tot_summa=0; for n=1:N_ summa=0; for d=1:D_ if R(n,d) ~= 0 summa=summa+R(n,d)*(w_(d)*X(n,d)+v_(d)); end end B(n,2)=1/(1+exp(-(b_+summa))); B(n,1)=1-B(n,2); % tot_summa=tot_summa+log(1+exp(-Y(n)*(b_+summa))); end % f=tot_summa; end function x= cg2 (x0) x=x0; visLineSearch = 0; % note in rasmussen uses uses polak update with inaxact line search for % minimize -> http://www.kyb.tuebingen.mpg.de/bs/people/carl/code/minimize/ mode = 'polak'; % 'fletcher', 'hestenes' or 'polak', on 'none' linesearch = 'test2'; % 'armijo', 'quadratic' or 'exact' objFuncValue = objFunc(x); oldObjFuncValue = objFuncValue*0.1+1; % implementation according to nocedal 5.2 algorithm 5.4 (FR-CG) % polak and hestenes updates for beta can be found in the same section dx = gradFunc(x); dir = -dx/norm(dx); % iterate iter = 0; numOfIter = 1000; prec = 1e-8; % convergence if gradient smaller than prec, change in objective function % smaller than prec or maximum number of iteration reached... while iter < numOfIter && abs((oldObjFuncValue-objFuncValue)/objFuncValue)>prec && norm(dx)>prec % iteration counter iter = iter + 1; if strcmp(linesearch,'armijo') alpha = mb_backtrackingLineSearch(objFunc,objFuncValue,x,dx,dir); elseif strcmp(linesearch,'exact') maxStepLength = 2; linesearchFunc = @(delta) objFunc(x+delta*dir); alpha = fminbnd(linesearchFunc,0,maxStepLength); % matlab inherent function to find 1d min elseif strcmp(linesearch,'quadratic') alpha = mb_quadraticApproximationLineSearch(objFunc,objFuncValue,x,dx,dir); elseif strcmp(linesearch,'test') alphas=1e-10:(.1*10^(-2))/(2*iter):(.5*10^(-1))/(2*iter); value = zeros(size(alphas)); for alpha_idx=1:length(alphas) delta=alphas(alpha_idx); value(alpha_idx)=objFunc(x+delta*dir); end % figure(1) % plot(alphas,value) [C,I]=min(value); alpha=alphas(I); elseif strcmp(linesearch,'test2') maxStepLength = (1*10^(-1))/(2*iter); linesearchFunc = @(delta) objFunc(x+delta*dir); alpha = fminbnd(linesearchFunc,0,maxStepLength); if alpha==maxStepLength disp('warning1') end end % update x x = x + alpha*dir; % update obj func values oldObjFuncValue = objFuncValue; objFuncValue = objFunc(x); % update dx oldDx = dx; dx = gradFunc(x); dx=dx./norm(dx); if strcmp(mode,'fletcher') beta = (dx'*dx)/(oldDx'*oldDx); elseif strcmp(mode,'hestenes') beta = (dx'*(dx-oldDx))/((dx-oldDx)'*dir); elseif strcmp(mode,'polak') beta = (dx'*(dx-oldDx))/(oldDx'*oldDx); elseif strcmp(mode,'none') beta=0; end % update search direction dir = -dx + beta*dir; fprintf(['Iteration ' num2str(iter) ' - Obj Func = ' num2str(objFuncValue) ' @ x = [' num2str(x') ']\n']); end end % function [x] = maxlikelihood(X,Y,R,x) % % k=1; % MINVAL=0; % MAX_ITER=1000; % alpha=.1; % values=zeros(MAX_ITER,1); % % d_k=zeros(size(x)); % % while abs(F(X,Y,R,x))>MINVAL && k1 % g_k=G(X,Y,R,x); % beta=(g_k'*g_k)/(g_k1'*g_k1); % g_k1=g_k; % % d_k=-G(X,Y,R,x)+beta*d_k; % else % first itertion % d_k=-G(X,Y,R,x); % g_k1=-d_k; % g_k=-d_k; % beta=0; % end % % % % [x,alpha]=linesearch(x,d_k,X,Y,R); % % % x=x+alpha*d_k; % % values(k)= F(X,Y,R,x); % k=k+1; % end % % figure(1000) % plot(values) % pause(1) % % end % % function [x_best,best_alpha]=linesearch(x,d_k,X,Y,R) % min_val=1e50; % for alpha=.1:.1:10 % x=x+alpha*d_k; % value= F(X,Y,R,x); % if value < min_val || alpha == .1 % best_alpha=alpha; % x_best=x; % end % end % end % objective function % function [f]=F(X,Y,R,x) % D_=size(X,2); % N_=size(X,1); % w_=x(1:D_); % v_=x(D_+1:2*D_); % b_=x(2*D_+1); % Lambda=0; % % tot_summa=0; % for n=1:N_ % % summa=0; % for d=1:D_ % if R(n,d) ~= 0 % summa=summa+R(n,d)*(w_(d)*X(n,d)+v_(d)); % end % end % % tot_summa=tot_summa+log(1+exp(-Y(n)*(b_+summa)))+Lambda*(w_'*w_+v_'*v_); % % end % % f=tot_summa; % % end % objective function gradient % function [g]=G(X,Y,R,x) % D_=size(X,2); % N_=size(X,1); % w_=x(1:D_); % v_=x(D_+1:2*D_); % b_=x(2*D_+1); % g=zeros(size(x)); % % Lambda=0.1; % % sum_w=sum(w_); % sum_v=sum(v_); % % for g_idx=1:length(x) % if g_idx<=length(w_) % % tot_summa=0; % for n=1:N_ % % summa1=0; % summa2=0; % for d=1:D_ % if R(n,d) ~= 0 % summa1=summa1+R(n,d)*X(n,d); % summa2=summa2+R(n,d)*(w_(d)*X(n,d)+v_(d)); % end % end % % tot_summa=tot_summa+(-Y(n)*summa1*exp(-Y(n)*(b_+summa2)))/(1+exp(-Y(n)*(b_+summa2))); % % end % g(g_idx)=tot_summa+2*sum_w; % % elseif g_idx <= 2*Lambda*length(w_) % % tot_summa=0; % for n=1:N_ % % summa1=0; % summa2=0; % for d=1:D_ % if R(n,d) ~= 0 % summa1=summa1+R(n,d); % summa2=summa2+R(n,d)*(w_(d)*X(n,d)+v_(d)); % end % end % % tot_summa=tot_summa+(-Y(n)*summa1*exp(-Y(n)*(b_+summa2)))/(1+exp(-Y(n)*(b_+summa2))); % % end % g(g_idx)=tot_summa+2*sum_v; % else % % tot_summa=0; % for n=1:N_ % % % summa2=0; % for d=1:D_ % if R(n,d) ~= 0 % summa2=summa2+R(n,d)*(w_(d)*X(n,d)+v_(d)); % end % end % % tot_summa=tot_summa+(-Y(n)*exp(-Y(n)*(b_+summa2)))/(1+exp(-Y(n)*(b_+summa2))); % % end % % g(g_idx)=tot_summa; % end % end % % % % end function f=objFunc(x) D_=size(X,2); N_=size(X,1); w_=x(1:D_); v_=x(D_+1:2*D_); b_=x(2*D_+1); % Lambda=0.1; tot_summa=0; for n=1:N_ % summa=0; % for d=1:D_ % if R(n,d) ~= 0 % summa=summa+R(n,d)*(w_(d)*X(n,d)+v_(d)); % end % end summa=R(n,:)*(X(n,:)'.*w_+v_); tot_summa=tot_summa+log(1+exp(-Y(n)*(b_+summa))); end f=tot_summa+Lam*(w_'*w_+v_'*v_); %f=tot_summa+Lam*N_*(norm(w_,1)+norm(v_,1)); end function g=gradFunc(x) D_=size(X,2); N_=size(X,1); w_=x(1:D_); v_=x(D_+1:2*D_); b_=x(2*D_+1); g=zeros(size(x)); % Lambda=0.1; sum_w=sum(w_); sum_v=sum(v_); for g_idx=1:length(x) if g_idx<=length(w_) tot_summa=0; for n=1:N_ % summa1=0; % summa2=0; % for d=1:D_ % if R(n,d) ~= 0 % summa1=summa1+R(n,d)*X(n,d); % summa2=summa2+R(n,d)*(w_(d)*X(n,d)+v_(d)); % end % end summa1=R(n,g_idx)*X(n,g_idx)'; summa2=R(n,:)*(X(n,:)'.*w_+v_); tot_summa=tot_summa+(-Y(n)*summa1*exp(-Y(n)*(b_+summa2)))/(1+exp(-Y(n)*(b_+summa2))); end g(g_idx)=tot_summa+2*Lam*x(g_idx); elseif g_idx <= 2*length(w_) tot_summa=0; for n=1:N_ % summa1=0; % summa2=0; % for d=1:D_ % if R(n,d) ~= 0 % summa1=summa1+R(n,d); % summa2=summa2+R(n,d)*(w_(d)*X(n,d)+v_(d)); % end % end summa1=R(n,g_idx-length(w_))*1; summa2=R(n,:)*(X(n,:)'.*w_+v_); tot_summa=tot_summa+(-Y(n)*summa1*exp(-Y(n)*(b_+summa2)))/(1+exp(-Y(n)*(b_+summa2))); end g(g_idx)=tot_summa+2*Lam*x(g_idx); else tot_summa=0; for n=1:N_ % summa2=0; % for d=1:D_ % if R(n,d) ~= 0 % summa2=summa2+R(n,d)*(w_(d)*X(n,d)+v_(d)); % end % end summa2=R(n,:)*(X(n,:)'.*w_+v_); tot_summa=tot_summa+(-Y(n)*exp(-Y(n)*(b_+summa2)))/(1+exp(-Y(n)*(b_+summa2))); end g(g_idx)=tot_summa; end end end end