function toccs(dtm, classid, name) % toccs(dtm, classid, name) % convert a sparse document-term matrix in Matlab to CCS format. % each row of dtm is a document and each column is a dimension. % classid is the class labels (starting from 1 in Matlab). [n,d] = size(dtm); [i,j,xx] = find(dtm'); dat = unitnorm(logidf(dtm),2)'; [i,j,s] = find(dat); j = [j; n+1]; j = diff(j); j = find(j>0); j = [0 ; j]; fid = fopen([name '_col_ccs'],'w'); fprintf(fid,'%d\n',j); fclose(fid); fid = fopen([name '_row_ccs'],'w'); fprintf(fid,'%d\n',i-1); fclose(fid); fid = fopen([name '_tfn_nz'],'w'); fprintf(fid,'%d\n',s); fclose(fid); fid = fopen([name '_txx_nz'],'w'); fprintf(fid,'%d\n',xx); fclose(fid); fid = fopen([name '_pattern'],'w'); fprintf(fid, '%d %d\n', n, 1); fprintf(fid, '%d\n', classid-1); fclose(fid); fid = fopen([name '_dim'],'w'); fprintf(fid, '%d %d %d\n', d, n, length(xx)); fclose(fid); return;