%GTM Trainable mapping fitting a Generative Topographic Mapping by EM % % [W,L] = GTM (A,K,M,MAPTYPE,REG,EPS,MAXITER) % % INPUT % A Dataset or double matrix % K Vector containing number of nodes per dimension (default: [5 5], 2D map) % M Vector containing number of basis functions per dimension (default: [10 10]) % MAPTYPE Map onto mean of posterior ('mean', default) or mode ('mode') % REG Regularisation (default: 0) % EPS Change in likelihood to stop training (default: 1e-5) % MAXITER Maximum number of iterations (default: inf) % % OUTPUT % W GTM mapping % L Likelihood % % DESCRIPTION % Trains a Generative Topographic Mapping of any dimension, using the EM % algorithm. % % REFERENCES % Bishop, C.M., Svensen, M. and Williams, C.K.I., "GTM: The Generative % Topographic Mapping", Neural Computation 10(1):215-234, 1998. % % SEE ALSO (PRTools Guide) % PLOTGTM, SOM, PRPLOTSOM % (c) Dick de Ridder, 2003 % Information & Communication Theory Group % Faculty of Electrical Engineering, Mathematics and Computer Science % Delft University of Technology, Mekelweg 4, 2628 CD Delft, The Netherlands function [w,L] = gtm (a, arg2, M, mapto, reg, epsilon, maxiter) step = 10; if (nargin < 2) prwarning(3,'number of nodes K not given, assuming [5 5] (2D map)'); arg2 = [5 5]; end; if (~ismapping(arg2)) if (nargin < 7) maxiter = inf; end; if (nargin < 6) prwarning(3,'stopping criteria not given, assuming EPSILON = 1e-10'); epsilon = 1e-10; end; if (nargin < 5) prwarning(3,'regularisation REG not given, assuming 0'); reg = 0; end; if (nargin < 4) prwarning(3,'mapping type MAPTO not given, assuming mean'); mapto = 'mean'; end; if (nargin < 3) prwarning(3,'number of basis functions M not given, assuming 10'); M = 5; end; end; if (nargin == 0) | isempty(a) W = prmapping(mfilename); W = setname(W,'GTM'); return; end % Prevent annoying messages: we will tell the user about any problems. warning off; a = testdatasize(a); t = +a'; [d,N] = size(t); [m,k] = size(a); % If we're to apply a trained mapping, unpack its parameters. if (ismapping(arg2)) w = arg2; data = getdata(w); K = data{1}; M = data{2}; W = data{3}; sigma = data{4}; mapto = data{5}; else K = arg2; end; % Create a KK-dimensional grid with dimensions K(1), K(2), ... of % D-dimensional grid points and store it as a D x KK matrix X. Do the % same for the basis function centers PHI_MU, with grid dimensions M(1), ... K = reshape(K,1,prod(size(K))); % Turn into vectors. M = reshape(M,1,prod(size(M))); % Check: either K or M should be a vector, or both should be vectors of % the same length. if (length(K) > 1) & (length(M) == 1) M = M * ones(1,length(K)); elseif (length(M) > 1) & (length(K) == 1) K = K * ones(1,length(M)); elseif (length(K) ~= length(M)) error ('Length of vectors K and M should be equal, or K and/or M should be scalar.'); end; D = length(K); KK = prod(K); MM = prod(M); if (D > d) error ('Length of vectors K and M should be <= the data dimensionality.'); end; x = makegrid(K,D); % Grid points. phi_mu = makegrid(M,D); % Basis function centers. phi_sigma = 2/(mean(M)-1); % Basis function widths. % Pre-calculate Phi. for j = 1:KK for i = 1:MM Phi(i,j) = exp(-(x(:,j)-phi_mu(:,i))'*(x(:,j)-phi_mu(:,i))/(phi_sigma^2)); end; end; if (~ismapping(arg2)) % Train mapping. tries = 0; retry = 1; while ((retry == 1) & (tries <= 5)) % Initialisation. ptx = zeros(KK,N); R = (1/KK)*ones(KK,N); C = prcov(t'); [U,DD] = preig(C); [dummy,ind] = sort(-diag(DD)); W = U(:,ind(1:D))*x*prpinv(Phi); if (size(C,1) > D) sigma = sqrt(DD(D+1,D+1)); else sigma = 1/mean(M); end; done = 0; iter = 0; retry = 0; likelihood = 1.0e20; while ((~done) & (~retry)) iter = iter + 1; done = 1; factor1 = (1/(2*pi*sigma^2))^(d/2); factor2 = (-1/(2*sigma^2)); WPhi = W*Phi; for i = 1:KK ptx(i,:) = factor1 * exp(factor2*sum((WPhi(:,i)*ones(1,N)-t).^2)); end; if (~retry) s = sum(ptx); s(find(s==0)) = realmin; % Prevent divide-by-zero. R = ptx ./ (ones(size(ptx,1),1)*s); G = diag(sum(R')); % M-step #1 (eqn. 12) W = (prinv(Phi*G*Phi' + reg*eye(MM))*Phi*R*t')'; % M-step #1 (eqn. 13) s = 0; WPhi = W*Phi; for i = 1:KK s = s + sum(R(i,:).*sum((WPhi(:,i)*ones(1,N)-t).^2)); end; sigma = sqrt((1/(N*d)) * s); % Re-calculate log-likelihood. prev_likelihood = likelihood; likelihood = sum(log(mean(ptx)+realmin)); if (rem (iter,step) == 0) prwarning (10, sprintf('[%3d] L: %2.2f (change: %2.2e)', ... iter, likelihood, abs ((likelihood - prev_likelihood)/likelihood))); end; % Continue? done = (abs ((likelihood - prev_likelihood)/likelihood) < epsilon); done = (done | (iter > maxiter)); if (~isfinite(likelihood)) prwarning(3,'Problem is poorly conditioned, retrying'); retry = 1; tries = tries + 1; end; end; end; end; L = likelihood; if (~retry) w = prmapping(mfilename,'trained',{K,M,W,sigma,mapto},[],k,D); w = setname(w,'GTM'); else prwarning(3,'Problem is too poorly conditioned, giving up'); prwarning(3,'Consider lowering K and M or increasing REG'); w = []; end; else % Apply mapping. factor1 = (1/(2*pi*sigma^2))^(d/2); factor2 = (-1/(2*sigma^2)); WPhi = W*Phi; for i = 1:KK ptx(i,:) = factor1 * exp(factor2*sum((WPhi(:,i)*ones(1,N)-t).^2)); end; s = sum(ptx); s(find(s==0)) = realmin; % Prevent divide-by-zero. R = ptx ./ (ones(size(ptx,1),1)*s); switch(mapto) case 'mean', out = (x*R)'; case 'mode', [dummy,ind] = max(R); out = x(:,ind)'; otherwise, error ('unknown mapping type: should be mean or mode') end; w = setdata(a,out,getlabels(w)); end; warning on; return % GRID = MAKEGRID (K,D) % % Create a KK = prod(K)-dimensional grid with dimensions K(1), K(2), ... of % D-dimensional uniformly spaced grid points on [0,1]^prod(K), and store it % as a D x KK matrix X. function grid = makegrid (K,D) KK = prod(K); for h = 1:D xx{h} = 0:(1/(K(h)-1)):1; % Support point means end; % Do that voodoo / that you do / so well... if (D==1) xm = xx; else cmd = '['; for h = 1:D-1, cmd = sprintf ('%sxm{%d}, ', cmd, h); end; cmd = sprintf ('%sxm{%d}] = ndgrid(', cmd, D); for h = 1:D-1, cmd = sprintf ('%sxx{%d}, ', cmd, h); end; cmd = sprintf ('%sxx{%d});', cmd, D); eval(cmd); end; cmd = 'mm = zeros(D, '; for h = 1:D-1, cmd = sprintf ('%s%d, ', cmd, K(h)); end; cmd = sprintf ('%s%d);', cmd, K(D)); eval (cmd); for h = 1:D cmd = sprintf ('mm(%d,', h); for g = 1:D-1, cmd = sprintf ('%s:,', cmd); end; cmd = sprintf ('%s:) = xm{%d};', cmd, h); eval (cmd); end; grid = reshape(mm,D,KK); return