import sys import parse def pairs(l): '''pairs(l) Return a list of all pairs of elements from list l pairs([1,2,3] -> [(1, 2), (1, 3), (2, 3)] ''' if len(l)==0: return [] e1=l[0] v=[(e1, e2) for e2 in l[1:]] return v+pairs(l[1:]) def score(p, ci): '''returns a boolean indicating whether the pair was assigned same''' c00=ci.lookup(0,p[0]) c01=ci.lookup(0,p[1]) c10=ci.lookup(1,p[0]) c11=ci.lookup(1,p[1]) if (c00==c01 and c10==c11) or (c00!=c01 and c10!=c11): return True return False def randindex(ci): correct=0 sample_pairs=pairs(ci.getSamples()) for p in sample_pairs: if score(p, ci): correct+=1 return float(correct)/len(sample_pairs) if __name__=='__main__': clfile=sys.argv[1] bootstraps=int(sys.argv[2]) hasheaders=False ci=parse.ClusterInfo() ci.parseFile(clfile, hasheaders) v=randindex(ci) cnt=0 for i in xrange(bootstraps): cip=ci.permute() vp=randindex(cip) if vp>v: cnt+=1 print v, float(cnt)/bootstraps