%clear all previous variables clear; format compact %load the yeast data load /home/lom/tmp/doublecheck_gsvd/yeast.mat [nGenes, nExps] = size(data) %Count and locate NaN Data nNanPerExp=zeros(nExps,1); for i=0:nExps-1 nNanPerExp(i+1) = (sum(sum(isnan(data),2)==i)); end %Create and display number of missing values bar(0:nExps-1 ,nNanPerExp, 'r'); axis([-.5 3.5 0 3000]) xlabel('Number of Arrays') ylabel('Number of Genes') %Take the only rows without missing values idx = sum(isnan(data),2)==0; fullmatrix = data(idx,:); fullgeneNames = geneNames(idx,:); %Calculate SVD [U S V] = svd(fullmatrix, 0); S=diag(S)' fractions = S.^2/sum(S.^2); entropy = -sum(fractions.*log(fractions))/log(nExps) %plot barchart subplot(1,3,2) ; barh([1:nExps],fractions, 'r') title('(b) Expresion Fraction') axes('position', [.45 .3 .16 .62]) barh([3:12], fractions(3:12), 'r') set(gca, 'Color', [1 1 .5]) %plot rasterplot expNames(expNames=='_')=' '; subplot(1,3,1); rasterplot(V', expNames) title('(a) Arrays') colormap(redgreen) xticklabel_rotate %plot two first eigengenes subplot(1,3,3); plot(V(:,1:2), '-o'); title('(c) Arrays') ylabel('Expression Level') set(gca, 'XTick', 1:nExps, 'XTickLabel', expNames) xticklabel_rotate