function C=hclust(data) % Group average agglomerative clustering L=length(data); c=[data,zeros(L,1),ones(L,1)]; % mu var n C=[c;zeros(L-1,3)]; % C is the repository of clusters, maximum size is 2L-1 % C=[]; %try it without including the terminal nodes in C j=L+1; while size(c,1)>1 % i.e. while there is more than 1 cluster in the active list [o,cidx]=sort(c(:,1));c=c(cidx,:); % identify the two closest clusters [o,e]=min(abs(diff(c(:,1)))); % create a new node that merges them together new=zeros(1,3); new(1)=(c(e,1)*c(e,3)+c(e+1,1)*c(e+1,3))/(c(e,3)+c(e+1,3)); % merged mean new(2)=mean(c(e:e+1,2))+var(c(e:e+1,1),new(1)); % merged variance new(3)=sum(c(e:e+1,3)); % length of merged cluster % overwrite c(e,:) with the new node c(e,:)=new; % eliminate c(e+1,:) active=[1:size(c,1)];active=active(active~=e+1);c=c(active,:); % append the new node to the repository C(j,:)=new; % augment j j=j+1; end function y=mean(x) y=sum(x)/length(x); function y=var(x,mu) y=sum((x-mu).^2)/length(x);