fork download
  1. # your code goes here
  2. # your code goes here
  3. # DNA sequences at the leaves (tips of the tree)
  4. sequences = {
  5. "A":"GGCTCTGTCAGCGATGTCATAAGCGGAAACGTTCATGCAAGGTTCGGCTAGAGTGTCGAG",
  6. "B":"GGCCAAGTCAGCACCGTAATAAGCGGCGACAGTCATCGATTGTTCGGCTACAGGGTCCAG",
  7. "C":"GGTCAACCCAGCAATGTCATAATAGGGGATATTCACAGGTTGATCGGCTACATTATCAGG",
  8. "D":"GGCAAAGACAGTAATGTCATAAGCGGCGATATTCCGGGATGGTGCGGCGTGAGTGTCGAC",
  9. "E":"GGCGATGTCAGCAATCTCTTAAGCGGCGACATTCATGGTTTCGTCGGCAATAGTGAAGAT",
  10. "F":"GGTTATGACATTAATGTCGTAAACGGCGACATTCACTAATTGATCGGCTAGAGAGTCAAG",
  11. "G":"GGCTGCATCCGCAATGTCATAAGAGCCTACTTCCATGGGTCGTTCGACTAGATTATCGAG",
  12. "H":"GGCTATCGCAGTCAGGTCGTAACCGAGTAGCTTCTTCGATGGTTGGTCTATAGCGTCTAG",
  13. "I":"GGCTATGTCTGCAAGGTCAAAAACGACTAGATGCATGTATTATTGGACTAGACCATCGTG"
  14. }
  15.  
  16. length = 60 # length of each DNA sequence
  17.  
  18. # Class to represent a node in the tree
  19. class Node:
  20. def __init__(self,name=None,left=None,right=None):
  21. self.name=name # tip label if this is a leaf (A..I), otherwise None
  22. self.left=left # left child
  23. self.right=right # right child
  24. self.sets=None # list of sets for each site after bottom-up Fitch pass
  25. self.assignment=None # final base assignment after top-down pass
  26.  
  27. # Build the tree structure ((A,(B,(C,D))),(E,(F,(G,(H,I)))))
  28. A=Node("A")
  29. B=Node("B")
  30. C=Node("C")
  31. D=Node("D")
  32. E=Node("E")
  33. F=Node("F")
  34. G=Node("G")
  35. H=Node("H")
  36. I=Node("I")
  37.  
  38. node_cd=Node(None,C,D) # internal node joining C and D
  39. node_b=Node(None,B,node_cd) # internal node joining B with (C,D)
  40. node_ab=Node(None,A,node_b) # internal node joining A with (B,(C,D))
  41.  
  42. node_hi=Node(None,H,I) # internal node joining H and I
  43. node_ghi=Node(None,G,node_hi) # internal node joining G with (H,I)
  44. node_fghi=Node(None,F,node_ghi)# internal node joining F with (G,(H,I))
  45. node_efghi=Node(None,E,node_fghi) # internal node joining E with (F,(G,(H,I)))
  46.  
  47. root=Node(None,node_ab,node_efghi) # root of the whole tree
  48.  
  49. # Fitch bottom-up pass
  50. def fitch_postorder(node):
  51. if node.name: # leaf node
  52. # Each site is a singleton set containing its observed base
  53. s=[set([sequences[node.name][i]]) for i in range(length)]
  54. node.sets=s
  55. return s,0 # no substitutions at leaves
  56. # internal node: process children
  57. left_sets,lscore=fitch_postorder(node.left)
  58. right_sets,rscore=fitch_postorder(node.right)
  59. total=lscore+rscore
  60. s=[]
  61. for i in range(length):
  62. inter=left_sets[i].intersection(right_sets[i])
  63. if inter:
  64. # if children share a base, parent set = intersection, no substitution added
  65. s.append(set(inter))
  66. else:
  67. # if disjoint, parent set = union, and add 1 substitution
  68. s.append(left_sets[i].union(right_sets[i]))
  69. total+=1
  70. node.sets=s
  71. return s,total
  72.  
  73. # Fitch top-down assignment
  74. def fitch_preorder(node,parent_assignment=None):
  75. if parent_assignment is None:
  76. # At the root: if set has one element, take it; if multiple, pick arbitrarily
  77. assignment=[]
  78. for i in range(length):
  79. if len(node.sets[i])==1:
  80. assignment.append(next(iter(node.sets[i])))
  81. else:
  82. assignment.append(sorted(node.sets[i])[0]) # pick one deterministically
  83. node.assignment="".join(assignment)
  84. else:
  85. # At other nodes: try to match parent's assignment if possible
  86. assignment=[]
  87. for i in range(length):
  88. if parent_assignment[i] in node.sets[i]:
  89. assignment.append(parent_assignment[i])
  90. else:
  91. assignment.append(sorted(node.sets[i])[0])
  92. node.assignment="".join(assignment)
  93. # recurse to children
  94. if node.left:
  95. fitch_preorder(node.left,node.assignment)
  96. if node.right:
  97. fitch_preorder(node.right,node.assignment)
  98.  
  99. # Run Fitch algorithm
  100. post_sets,score=fitch_postorder(root) # bottom-up: sets + parsimony score
  101. fitch_preorder(root) # top-down: assign actual sequence
  102.  
  103. # Extract results
  104. root_sequence=root.assignment
  105. print(root_sequence) # most parsimonious root sequence
  106. print(score) # total parsimony score
  107.  
Success #stdin #stdout 0.07s 14136KB
stdin
Standard input is empty
stdout
GGCTATGTCAGCAATGTCATAAGCGGCGACATTCATGGATTGTTCGGCTAGAGTGTCGAG
109