%calculate MMD resulting from complete fragmentation of input MMD
%
%SYNTAX
%[OUT1, OUT2]=chainfracture(IN1, IN2, OPIN)
%   IN1 = initial MMD as [DP, CHAIN concentration]
%       DP must be incremented by 1 w/o gaps; for linear chains only it must start at 1.
%       IN1 is an
%           nx2 matrix for linear chains;
%           table for branched chains:
%               row name = topology descriptor (see Table 13)
%               variable named mass = nxm matrix of DPs for each individual segment of each chain of given topology
%               variable named frac =  concentration of each chain in the sample
%   IN2 = variance of the fragmentation distribution in DP,
%
%   OUT1 = MMD resulting from complete fragmentation in the same format as IN1
%       because fracture of branched chains generates chains of different
%       topologies, OUT1 will in general have multiple rows
%
% For chains with
%   <3 branching points, the macroradical is always located at the terminus of the rightmost arm
%   >3 branching points, OUT1 will contain first all chains w/ macroradical at the leftmost side-chain, then all chains w/ macroradicals
%       at the next branch, etc. Appearance of the 1st 0 in each column signifies change in the subset of macroradicals
%
% Apart from the radical-bearing arm, arms in each branched are sorted in the increasing size.
% 0 mass corresponds to the radical at the branch point
%
% The larger the chain, especially if it's branched, the more memory the code will require.
% To make the code portable, we removed system memory checks, which may make the code terminate with the "Out of memory" error. The simplest
% work around is to rerun code on a system with more memory; see the "Codefor generating product distributions" of the SI for other strategies to
% reduce memory requirement. All results in the examples.mat file were generated on a PC with 32 Gb of RAM.
%
%
function fmmd=chainfracture(mmd, fragvar)
warning('off'), 
if istable(mmd) && size(mmd.mass{1},2)==1, mmd=[double(mmd.mass{1}), mmd.frac{1}]; end

if isnumeric(mmd)
    
    fragdist=triu(exp(-triu(1-repmat((1:mmd(end,1)-1)', 1, mmd(end,1)-1)./repmat((1:.5:mmd(end,1)/2), mmd(end,1)-1,1)).^2/fragvar));
    fragdist(:, 2:end)=fragdist(:, 2:end)./repmat(sum(fragdist(:,2:end)), mmd(end,1)-1,1);
    fragdist=2*fragdist;
    
    fmmd=[(1:mmd(end,1)-1)', fragdist*mmd(2:end,2)];
    
else
    mmd=cleanbranch(mmd);
    name=mmd.Properties.RowNames{1};
    xnum=str2double(regexprep(regexp(name, 'x[2-9]?', 'match'), 'x', '', 'once')); xnum(isnan(xnum))=1;
    inds=table;
    fmmd=cell2table(cell(2,2), 'VariableNames', {'mass', 'frac'}, 'RowNames',{name; 'linear'});
    if length(xnum)==1, inds.segind={size(mmd.mass{1},2)-[1,0]};
    else
        arms=str2double(regexprep(regexp(name, 'p[0-9]{0,}', 'match'), 'p', '')); arms(2)=[]; arms(isnan(arms))=1;
        if length(xnum)==2, inds.code=[1 2 102]';
            inds.flip{3}=size(mmd.mass{1},2);
            if arms(1)==arms(2) && xnum(1)==xnum(2),
                inds.flip{3}=[[1:arms(1), arms(1)+2:inds.flip{3}]; [arms(1)+2:inds.flip{3}, 1:arms(1)]];
            else
                inds.fname{3}=regexprep(['p', num2str(arms(2)), 'x', num2str(xnum(2)), 'px', num2str(xnum(1)), 'p', num2str(arms(1))], '(?<=[px])1(?=[px]|$)', '');
                fmmd(inds.fname{3},:)=cell(1,2);
                inds.flip{3}=[arms(1)+2:inds.flip{3}, arms(1)+1, 1:arms(1)];
            end
            inds.segind={arms(1)-[1,0]; size(mmd.mass{1},2)-[1,0]; [arms(1), arms(1)+1, size(mmd.mass{1},2)]};
            tname=regexprep(cellfun(@sprintf, {'p%ix%i', 'p%ix%i'}, {[arms(1)+1, xnum(1)], [arms(2)+1, xnum(2)]}, 'UniformOutput', 0), '(?<=[px])1(?=[px]|$)', '');
            inds.pname{3}=tname{1}; inds.pnamef{3}=tname{2};
            spanl=sum(mmd.mass{1}(:, inds.segind{end}),2, 'native');
            
        elseif length(xnum)==3
            inds.code=[1 2 3 102 203 1003]';
            inds.flip{4}=size(mmd.mass{1},2);
            if arms(1)==arms(3) && xnum(1)==xnum(3),
                inds.flip{4}=[[1:arms(1)+1, arms(1)+arms(2)+1:inds.flip{4}]; [arms(1)+arms(2)+2:inds.flip{4},...
                    arms(1)+arms(2)+1, arms(1)+1, 1:arms(1)]];
            else
                inds.fname{4}=regexprep(sprintf('p%ix%ipx%ip%ix%ip%i', arms(3), xnum(3), xnum(2), arms(2), xnum(1), arms(1)),'(?<=[px])1(?=[px]|$)', '');
                fmmd(inds.fname{4},:)=cell(1,2);
                inds.fname{end}=inds.fname{4};
                inds.flip{4}=[inds.flip{4}-arms(3)+1:inds.flip{4}, inds.flip{4}-arms(3), arms(1)+2:arms(1)+arms(2), arms(1)+1, 1:arms(1)];
            end
            inds.flip{end}=inds.flip{4};
            
            tname=regexprep(cellfun(@sprintf, {'p%ix%i', 'p%ix%ipx%ip%i'; 'p%ix%ipx%ip%i', 'p%ix%i'}, {[arms(1)+1, xnum(1)], [arms(3), xnum(3), xnum(2), arms(2)];...
                [arms(1), xnum(1), xnum(2), arms(2)], [arms(3)+1, xnum(3)]}, 'UniformOutput', 0), '(?<=[px])1(?=[px]|$)', '');
            inds.pname(4:5)=tname(:,1); inds.pnamef(4:5)=tname(:,2);
            inds.pname{inds.code==1003}=tname(1,:); inds.pnamef{end}=tname(2,:);
            
            arms(2)=arms(2)-1;
            inds.segind={arms(1)-[1,0]; sum(arms(1:2))+[1,2]; size(mmd.mass{1},2)-[1,0]; [arms(1), arms(1)+1, sum(arms(1:2))+1];...
                [sum(arms(1:2))+1, sum(arms(1:2))+2,size(mmd.mass{1},2)]; [arms(1), arms(1)+1, sum(arms(1:2))+2,size(mmd.mass{1},2)]};
            spanl=sum(mmd.mass{1}(:, inds.segind{6}),2, 'native'); %left+right (3x)
            if arms(2)>0
                spanl=[sum(mmd.mass{1}(:, inds.segind{4}),2, 'native'), sum(mmd.mass{1}(:, inds.segind{5}),2, 'native'),  spanl]; %left+center (2x), center+right (2x),
            else, inds([2,4,5],:)=[];
                spanl=sum(mmd.mass{1}(:, inds.segind{3}),2, 'native');
            end
            if arms(3)>1, spanl=[sum(mmd.mass{1}(:, inds.segind{3}),2, 'native'), spanl];  else inds(inds.code==3,:)=[]; end
        end
        if arms(2)>1, spanl=[sum(mmd.mass{1}(:, inds.segind{2}),2, 'native'), spanl]; else inds(2,:)=[]; end
        if arms(1)>1, spanl=[sum(mmd.mass{1}(:, inds.segind{1}),2, 'native'), spanl]; else inds(1,:)=[]; end
        
        tname=unique(tname);
        fmmd=[fmmd; cell2table(cell(length(tname),2), 'VariableNames', {'mass', 'frac'}, 'RowNames', tname)];
    end
    
    for i=1:height(inds),
        if height(inds)==1, inds.mmdind{1}=(1:size(mmd.mass{1},1))';
        else inds.mmdind{i}=find(spanl(:, i)==max(spanl,[], 2));
            if i>1,  inds.mmdind{i}(any(spanl(inds.mmdind{i}, i)==spanl(inds.mmdind{i}, 1:i-1),2))=[]; end
        end
        if ~isempty(inds.mmdind{i})
            [tmmd, tind]=sortrows(mmd.mass{1}(inds.mmdind{i}, inds.segind{i}), 1:length(inds.segind{i}));
            inds.mmdind{i}=inds.mmdind{i}(tind);
            [tmmd, ~, inds.uind{i}]=unique(tmmd, 'rows');
            [inds.fragout{i}, inds.fragdist{i}]=subfragmmd(tmmd, fragvar); clear tmmd
            
            tmass=mat2cell(mmd.mass{1}(inds.mmdind{i},:), accumarray(inds.uind{i}, ones(size(inds.mmdind{i}))), size(mmd.mass{1},2));
            tsize=cellfun('size', tmass, 1); tmass=cell2mat(tmass(inds.fragout{i}.pmassind));
            tfragout=repelem(inds.fragout{i}(:, {'ind', 'sbind'}), tsize(inds.fragout{i}.pmassind),1);
            
            tmass(tfragout.ind(:,1), inds.segind{i}(end))=tmass(tfragout.ind(:,1), inds.segind{i}(end))-tfragout.sbind(tfragout.ind(:,1));
            if size(tfragout.ind,2)==1
                tmass(~tfragout.ind(:,1), inds.segind{i}(1))=tmass(~tfragout.ind(:,1), inds.segind{i}(1))-tfragout.sbind(~tfragout.ind(:,1));
                tmass(~tfragout.ind(:,1), inds.segind{i}([1,end]))=tmass(~tfragout.ind(:,1), inds.segind{i}([end,1]));
                tfracs=mat2cell(mmd.frac{1}(inds.mmdind{i}), accumarray(inds.uind{i}, ones(size(inds.mmdind{i}))), 1);
                tfracs=cell2mat(tfracs(inds.fragout{i}.pmassind)).*repelem(inds.fragout{i}.pfracs, tsize(inds.fragout{i}.pmassind),1);
                tfragout.ind=false(height(tfragout),1);
                [fmmd.mass{'linear'}, ~, uind]=unique([fmmd.mass{'linear'}; tfragout.sbind]);
            else
                [fmmd.mass{'linear'}, ~, uind]=unique([fmmd.mass{'linear'}; tfragout.sbind(any(tfragout.ind(:,1:2),2))]);
                tmass(tfragout.ind(:,2), inds.segind{i}(1))=tmass(tfragout.ind(:,2), inds.segind{i}(1))-tfragout.sbind(tfragout.ind(:,2));
                if size(inds.flip{i},1)==2,
                    tmass(tfragout.ind(:,2), inds.flip{i}(1,:))=tmass(tfragout.ind(:,2), inds.flip{i}(2,:));
                elseif  size(inds.flip{i},1)==1
                    tmass(tfragout.ind(:,2), :)=tmass(tfragout.ind(:,2), inds.flip{i});
                end
                fracs=mat2cell(mmd.frac{1}(inds.mmdind{i}), accumarray(inds.uind{i}, ones(size(inds.mmdind{i}))), 1);
                tind=any(inds.fragout{i}.ind(:,1:2),2);
                tfracs=cell2mat(fracs(inds.fragout{i}.pmassind(tind))).*repelem(inds.fragdist{i}(inds.fragout{i}.pfracsind(tind)), tsize(inds.fragout{i}.pmassind(tind)), 1);
                clear tind
            end
            inds.mmdind{i}=[]; inds.uind{i}=[];
            
            fmmd.frac{'linear'}=accumarray(uind, [fmmd.frac{'linear'}; tfracs]);
            
            if any(contains(inds.Properties.VariableNames, 'flip')) && size(inds.flip{i},1)==1
                [fmmd.mass{inds.fname{i}}, ~, uind]=unique([fmmd.mass{inds.fname{i}}; tmass(tfragout.ind(:,2),:)], 'rows');
                tmp=tfragout.ind(any(tfragout.ind(:,1:2),2),1:2);
                fmmd.frac{inds.fname{i}}=accumarray(uind, [fmmd.frac{inds.fname{i}}; tfracs(tmp(:,2))]);
                [fmmd.mass{name}, ~, uind]=unique([fmmd.mass{name}; tmass(tfragout.ind(:,1),:)], 'rows');
                fmmd.frac{name}=accumarray(uind, [fmmd.frac{name}; tfracs(tmp(:,1))]);
                clear tmp
                tfragout.ind(:,1)=~any(tfragout.ind(:,1:2),2); tfragout.ind(:,2)=[];
            else
                if size(tfragout.ind,2)>1,  tfragout.ind(:,1)=~any(tfragout.ind(:,1:2),2); tfragout.ind(:,2)=[]; end %fragout.ind(:,1)=breaking between branch points
                [fmmd.mass{name}, ~, uind]=unique([fmmd.mass{name}; tmass(~tfragout.ind(:,1),:)], 'rows');
                fmmd.frac{name}=accumarray(uind, [fmmd.frac{name}; tfracs]);
            end
            clear tfracs uind
            
            if height(inds)>1 && inds.code(i)>10
                tmass(~tfragout.ind(:,1),:)=[]; fracs=cell2mat(fracs(inds.fragout{i}.pmassind));
                fracs=fracs(tfragout.ind(:,1)).*repelem(inds.fragdist{i}(inds.fragout{i}.pfracsind(~any(inds.fragout{i}.ind(:,1:2),2))),...
                    tsize(inds.fragout{i}.pmassind(~any(inds.fragout{i}.ind(:,1:2),2))), 1);
                tfragout=tfragout(tfragout.ind(:,1),:);
                inds.fragout{i}=[]; inds.fragdist{i}=[];
                
                if size(tfragout.ind,2)==2,
                    tfragout.ind(:,1)=[];
                    pname=inds.pname{i}{1}; pnamef=inds.pname{i}{2};
                else pname=inds.pname{i}; pnamef=inds.pnamef{i}; end
                
                [fmmd.mass{pname}, ~, uind]=unique([fmmd.mass{pname}; [tmass(tfragout.ind, 1:inds.segind{i}(2)-1), sum(tmass(tfragout.ind, inds.segind{i}(2:end)),2,...
                    'native')-tfragout.sbind(tfragout.ind)]], 'rows');
                fmmd.frac{pname}=accumarray(uind, [fmmd.frac{pname}; fracs(tfragout.ind)]);
                
                tmp=size(tmass,2); if length(xnum)==2, tmp=arms(1)+2:tmp;
                else tmp=[tmp-arms(3)+1:tmp, tmp-arms(3), arms(1)+2:arms(1)+arms(2)+1, arms(1)+1, 1:arms(1)]; tmp(tmp<=inds.segind{i}(2))=[]; end
                [fmmd.mass{pnamef}, ~, uind]=unique([fmmd.mass{pnamef}; [tmass(tfragout.ind, tmp), tfragout.sbind(tfragout.ind)-...
                    sum(tmass(tfragout.ind, inds.segind{i}(3:end)),2,'native')]], 'rows');
                fmmd.frac{pnamef}=accumarray(uind, [fmmd.frac{pnamef}; fracs(tfragout.ind)]);
                
                if inds.code(i)>1000,
                    fracs(tfragout.ind)=[]; tmass(tfragout.ind, :)=[]; tfragout=tfragout.sbind(~tfragout.ind);
                    pname=inds.pnamef{i}{1}; pnamef=inds.pnamef{i}{2};
                    [fmmd.mass{pname}, ~, uind]=unique([fmmd.mass{pname}; [tmass(:, 1:inds.segind{i}(3)-1), sum(tmass(:, inds.segind{i}(3:end)),2,'native')-...
                        tfragout]], 'rows');
                    fmmd.frac{pname}=accumarray(uind, [fmmd.frac{pname}; fracs]);
                    
                    tmp=size(tmass,2); tmp=[tmp-arms(3)+1:tmp, tmp-arms(3), arms(1)+2:arms(1)+arms(2)+1, arms(1)+1, 1:arms(1)];
                    tmp(tmp<=inds.segind{i}(3))=[];
                    try [fmmd.mass{pnamef}, ~, uind]=unique([fmmd.mass{pnamef}; [tmass(:, tmp), tfragout-sum(tmass(:, inds.segind{i}(4:end)),2,'native')]], 'rows');
                    catch, end
                    fmmd.frac{pnamef}=accumarray(uind, [fmmd.frac{pnamef}; fracs]);
                end
            end
            inds.fragout{i}=[];
            if size(inds,1)>1 && inds.code(i)==1 && inds.segind{i}(end)<size(mmd.mass{1},2),
                fmmd.mass{inds.fname{inds.code==102}}=fmmd.mass{1}(:, inds.flip{inds.code==102});
                fmmd.frac(inds.fname{inds.code==102})=fmmd.frac(1); fmmd.mass{1}=[]; fmmd.frac{1}=[]; end
        end
    end
    
    if exist('mult', 'var'), fmmd.mass=cellfun(@times, fmmd.mass, repmat({mult}, height(fmmd), 1), 'UniformOutput', 0); end
    fmmd(cellfun('isempty', fmmd.mass),:)=[];
    
    %check for errors
    tmp=regexp(fmmd.Properties.RowNames(~contains(fmmd.Properties.RowNames, 'linear')), 'p[0-9]{0,}', 'match');
    ind=repelem((1:size(tmp,1))', cellfun('size', tmp, 2),1);
    tmp=str2double(regexprep(cat(2, tmp{:})', 'p', '')); tmp(isnan(tmp))=1;
    if any(accumarray(ind, tmp)~=cellfun('size', fmmd.mass(~contains(fmmd.Properties.RowNames, 'linear')), 2))
        error('the number of columns specifying at least one product doesnot match its name')
    end
    if height(inds)<4 && any(fmmd.mass{1}(:, 1:end-1)==0, 'all') && (any(contains(fmmd.Properties.VariableNames, 'fname')) &&...
            (any(fmmd.mass{inds.fname{~cellfun('isempty', inds.fname)}}(:, 1:end-1)==0, 'all') || any(fmmd.mass{inds.pname{~cellfun('isempty', inds.pname)}}==0, 'all') || ...
            any(fmmd.mass{inds.pnamef{~cellfun('isempty', inds.pnamef)}}==0, 'all'))), error('fragmmd: 0 mass present in wrong columns')
    end
end
end

function [out, fragdist]=subfragmmd(mmd, fragvar)
out=table;
mmd(:,end+1)=sum(mmd,2);
[utmass, ~, utmassind]=unique(mmd(:,end));
utmass=double(utmass);
fragdist=triu(exp(-(1-(1:utmass(end,1)-1)'./(1:.5:utmass(end,1)/2)).^2/fragvar));
fragdist=fragdist(:,utmass-1)./sum(fragdist(:,utmass-1));

pfracsind=mat2cell(find(fragdist~=0), utmass-1,1);
pfracsind=cell2mat(pfracsind(utmassind));

sbind=cellfun(@transpose, cellfun(@colon, repmat({1}, size(utmass)), num2cell(utmass-1), 'UniformOutput', 0), 'UniformOutput', 0);
sbind=cell2mat(sbind(utmassind));
pmassind=repelem((1:size(mmd,1))', mmd(:,end)-1,1);

ind=sbind<=mmd(pmassind, end-1);

if contains(class(mmd), 'uint8'), sbind=uint8(sbind); elseif contains(class(mmd), 'uint16'), sbind=uint16(sbind); elseif contains(class(mmd), 'uint32'),
    sbind=uint16(sbind); end

if size(mmd,2)==3
    sbind(~ind)=mmd(pmassind(~ind),end)-sbind(~ind); %adjust linear product
    dupind=sbind==mmd(pmassind, end-1);
    pfracs=fragdist(pfracsind(:,1)); pfracs(dupind)=pfracs(dupind)/2;
    out=table([ind; false(length(find(dupind)),1)], [sbind; mmd(pmassind(dupind),end)-sbind(dupind)], [pmassind; pmassind(dupind)], [pfracs; pfracs(dupind)],...
        'VariableNames', {'ind', 'sbind', 'pmassind', 'pfracs'});
    fragdist=[];
    
elseif size(mmd,2)>=4
    ind(~ind,2)=sbind(~ind)>=sum(mmd(pmassind(~ind), 2:end-1),2, 'native');
    sbind(ind(:,2))=mmd(pmassind(ind(:,2)), end)-sbind(ind(:,2));
    if size(mmd,2)>4
        ind(:,3)=sbind<(mmd(pmassind,end)-mmd(pmassind,1))&sbind>sum(mmd(pmassind,3:4), 2, 'native');
    end
    out=table(ind, sbind, pmassind, pfracsind, 'VariableNames', {'ind', 'sbind', 'pmassind', 'pfracsind'});
end

clear utmassind dupind pfracsind ind
end




