# your code goes here
# your code goes here
# DNA sequences at the leaves (tips of the tree)
sequences = {
"A":"GGCTCTGTCAGCGATGTCATAAGCGGAAACGTTCATGCAAGGTTCGGCTAGAGTGTCGAG",
"B":"GGCCAAGTCAGCACCGTAATAAGCGGCGACAGTCATCGATTGTTCGGCTACAGGGTCCAG",
"C":"GGTCAACCCAGCAATGTCATAATAGGGGATATTCACAGGTTGATCGGCTACATTATCAGG",
"D":"GGCAAAGACAGTAATGTCATAAGCGGCGATATTCCGGGATGGTGCGGCGTGAGTGTCGAC",
"E":"GGCGATGTCAGCAATCTCTTAAGCGGCGACATTCATGGTTTCGTCGGCAATAGTGAAGAT",
"F":"GGTTATGACATTAATGTCGTAAACGGCGACATTCACTAATTGATCGGCTAGAGAGTCAAG",
"G":"GGCTGCATCCGCAATGTCATAAGAGCCTACTTCCATGGGTCGTTCGACTAGATTATCGAG",
"H":"GGCTATCGCAGTCAGGTCGTAACCGAGTAGCTTCTTCGATGGTTGGTCTATAGCGTCTAG",
"I":"GGCTATGTCTGCAAGGTCAAAAACGACTAGATGCATGTATTATTGGACTAGACCATCGTG"
}
length = 60 # length of each DNA sequence
# Class to represent a node in the tree
class Node:
def __init__(self,name=None,left=None,right=None):
self.name=name # tip label if this is a leaf (A..I), otherwise None
self.left=left # left child
self.right=right # right child
self.sets=None # list of sets for each site after bottom-up Fitch pass
self.assignment=None # final base assignment after top-down pass
# Build the tree structure ((A,(B,(C,D))),(E,(F,(G,(H,I)))))
A=Node("A")
B=Node("B")
C=Node("C")
D=Node("D")
E=Node("E")
F=Node("F")
G=Node("G")
H=Node("H")
I=Node("I")
node_cd=Node(None,C,D) # internal node joining C and D
node_b=Node(None,B,node_cd) # internal node joining B with (C,D)
node_ab=Node(None,A,node_b) # internal node joining A with (B,(C,D))
node_hi=Node(None,H,I) # internal node joining H and I
node_ghi=Node(None,G,node_hi) # internal node joining G with (H,I)
node_fghi=Node(None,F,node_ghi)# internal node joining F with (G,(H,I))
node_efghi=Node(None,E,node_fghi) # internal node joining E with (F,(G,(H,I)))
root=Node(None,node_ab,node_efghi) # root of the whole tree
# Fitch bottom-up pass
def fitch_postorder(node):
if node.name: # leaf node
# Each site is a singleton set containing its observed base
s=[set([sequences[node.name][i]]) for i in range(length)]
node.sets=s
return s,0 # no substitutions at leaves
# internal node: process children
left_sets,lscore=fitch_postorder(node.left)
right_sets,rscore=fitch_postorder(node.right)
total=lscore+rscore
s=[]
for i in range(length):
inter=left_sets[i].intersection(right_sets[i])
if inter:
# if children share a base, parent set = intersection, no substitution added
s.append(set(inter))
else:
# if disjoint, parent set = union, and add 1 substitution
s.append(left_sets[i].union(right_sets[i]))
total+=1
node.sets=s
return s,total
# Fitch top-down assignment
def fitch_preorder(node,parent_assignment=None):
if parent_assignment is None:
# At the root: if set has one element, take it; if multiple, pick arbitrarily
assignment=[]
for i in range(length):
if len(node.sets[i])==1:
assignment.append(next(iter(node.sets[i])))
else:
assignment.append(sorted(node.sets[i])[0]) # pick one deterministically
node.assignment="".join(assignment)
else:
# At other nodes: try to match parent's assignment if possible
assignment=[]
for i in range(length):
if parent_assignment[i] in node.sets[i]:
assignment.append(parent_assignment[i])
else:
assignment.append(sorted(node.sets[i])[0])
node.assignment="".join(assignment)
# recurse to children
if node.left:
fitch_preorder(node.left,node.assignment)
if node.right:
fitch_preorder(node.right,node.assignment)
# Run Fitch algorithm
post_sets,score=fitch_postorder(root) # bottom-up: sets + parsimony score
fitch_preorder(root) # top-down: assign actual sequence
# Extract results
root_sequence=root.assignment
print(root_sequence) # most parsimonious root sequence
print(score) # total parsimony score
IyB5b3VyIGNvZGUgZ29lcyBoZXJlCiMgeW91ciBjb2RlIGdvZXMgaGVyZQojIEROQSBzZXF1ZW5jZXMgYXQgdGhlIGxlYXZlcyAodGlwcyBvZiB0aGUgdHJlZSkKc2VxdWVuY2VzID0gewoiQSI6IkdHQ1RDVEdUQ0FHQ0dBVEdUQ0FUQUFHQ0dHQUFBQ0dUVENBVEdDQUFHR1RUQ0dHQ1RBR0FHVEdUQ0dBRyIsCiJCIjoiR0dDQ0FBR1RDQUdDQUNDR1RBQVRBQUdDR0dDR0FDQUdUQ0FUQ0dBVFRHVFRDR0dDVEFDQUdHR1RDQ0FHIiwKIkMiOiJHR1RDQUFDQ0NBR0NBQVRHVENBVEFBVEFHR0dHQVRBVFRDQUNBR0dUVEdBVENHR0NUQUNBVFRBVENBR0ciLAoiRCI6IkdHQ0FBQUdBQ0FHVEFBVEdUQ0FUQUFHQ0dHQ0dBVEFUVENDR0dHQVRHR1RHQ0dHQ0dUR0FHVEdUQ0dBQyIsCiJFIjoiR0dDR0FUR1RDQUdDQUFUQ1RDVFRBQUdDR0dDR0FDQVRUQ0FUR0dUVFRDR1RDR0dDQUFUQUdUR0FBR0FUIiwKIkYiOiJHR1RUQVRHQUNBVFRBQVRHVENHVEFBQUNHR0NHQUNBVFRDQUNUQUFUVEdBVENHR0NUQUdBR0FHVENBQUciLAoiRyI6IkdHQ1RHQ0FUQ0NHQ0FBVEdUQ0FUQUFHQUdDQ1RBQ1RUQ0NBVEdHR1RDR1RUQ0dBQ1RBR0FUVEFUQ0dBRyIsCiJIIjoiR0dDVEFUQ0dDQUdUQ0FHR1RDR1RBQUNDR0FHVEFHQ1RUQ1RUQ0dBVEdHVFRHR1RDVEFUQUdDR1RDVEFHIiwKIkkiOiJHR0NUQVRHVENUR0NBQUdHVENBQUFBQUNHQUNUQUdBVEdDQVRHVEFUVEFUVEdHQUNUQUdBQ0NBVENHVEciCn0KCmxlbmd0aCA9IDYwICAjIGxlbmd0aCBvZiBlYWNoIEROQSBzZXF1ZW5jZQoKIyBDbGFzcyB0byByZXByZXNlbnQgYSBub2RlIGluIHRoZSB0cmVlCmNsYXNzIE5vZGU6CiAgICBkZWYgX19pbml0X18oc2VsZixuYW1lPU5vbmUsbGVmdD1Ob25lLHJpZ2h0PU5vbmUpOgogICAgICAgIHNlbGYubmFtZT1uYW1lICAgICAgICAgICMgdGlwIGxhYmVsIGlmIHRoaXMgaXMgYSBsZWFmIChBLi5JKSwgb3RoZXJ3aXNlIE5vbmUKICAgICAgICBzZWxmLmxlZnQ9bGVmdCAgICAgICAgICAjIGxlZnQgY2hpbGQKICAgICAgICBzZWxmLnJpZ2h0PXJpZ2h0ICAgICAgICAjIHJpZ2h0IGNoaWxkCiAgICAgICAgc2VsZi5zZXRzPU5vbmUgICAgICAgICAgIyBsaXN0IG9mIHNldHMgZm9yIGVhY2ggc2l0ZSBhZnRlciBib3R0b20tdXAgRml0Y2ggcGFzcwogICAgICAgIHNlbGYuYXNzaWdubWVudD1Ob25lICAgICMgZmluYWwgYmFzZSBhc3NpZ25tZW50IGFmdGVyIHRvcC1kb3duIHBhc3MKCiMgQnVpbGQgdGhlIHRyZWUgc3RydWN0dXJlICgoQSwoQiwoQyxEKSkpLChFLChGLChHLChILEkpKSkpKQpBPU5vZGUoIkEiKQpCPU5vZGUoIkIiKQpDPU5vZGUoIkMiKQpEPU5vZGUoIkQiKQpFPU5vZGUoIkUiKQpGPU5vZGUoIkYiKQpHPU5vZGUoIkciKQpIPU5vZGUoIkgiKQpJPU5vZGUoIkkiKQoKbm9kZV9jZD1Ob2RlKE5vbmUsQyxEKSAgICAgICAgICMgaW50ZXJuYWwgbm9kZSBqb2luaW5nIEMgYW5kIEQKbm9kZV9iPU5vZGUoTm9uZSxCLG5vZGVfY2QpICAgICMgaW50ZXJuYWwgbm9kZSBqb2luaW5nIEIgd2l0aCAoQyxEKQpub2RlX2FiPU5vZGUoTm9uZSxBLG5vZGVfYikgICAgIyBpbnRlcm5hbCBub2RlIGpvaW5pbmcgQSB3aXRoIChCLChDLEQpKQoKbm9kZV9oaT1Ob2RlKE5vbmUsSCxJKSAgICAgICAgICMgaW50ZXJuYWwgbm9kZSBqb2luaW5nIEggYW5kIEkKbm9kZV9naGk9Tm9kZShOb25lLEcsbm9kZV9oaSkgICMgaW50ZXJuYWwgbm9kZSBqb2luaW5nIEcgd2l0aCAoSCxJKQpub2RlX2ZnaGk9Tm9kZShOb25lLEYsbm9kZV9naGkpIyBpbnRlcm5hbCBub2RlIGpvaW5pbmcgRiB3aXRoIChHLChILEkpKQpub2RlX2VmZ2hpPU5vZGUoTm9uZSxFLG5vZGVfZmdoaSkgIyBpbnRlcm5hbCBub2RlIGpvaW5pbmcgRSB3aXRoIChGLChHLChILEkpKSkKCnJvb3Q9Tm9kZShOb25lLG5vZGVfYWIsbm9kZV9lZmdoaSkgIyByb290IG9mIHRoZSB3aG9sZSB0cmVlCgojIEZpdGNoIGJvdHRvbS11cCBwYXNzCmRlZiBmaXRjaF9wb3N0b3JkZXIobm9kZSk6CiAgICBpZiBub2RlLm5hbWU6ICAjIGxlYWYgbm9kZQogICAgICAgICMgRWFjaCBzaXRlIGlzIGEgc2luZ2xldG9uIHNldCBjb250YWluaW5nIGl0cyBvYnNlcnZlZCBiYXNlCiAgICAgICAgcz1bc2V0KFtzZXF1ZW5jZXNbbm9kZS5uYW1lXVtpXV0pIGZvciBpIGluIHJhbmdlKGxlbmd0aCldCiAgICAgICAgbm9kZS5zZXRzPXMKICAgICAgICByZXR1cm4gcywwICAjIG5vIHN1YnN0aXR1dGlvbnMgYXQgbGVhdmVzCiAgICAjIGludGVybmFsIG5vZGU6IHByb2Nlc3MgY2hpbGRyZW4KICAgIGxlZnRfc2V0cyxsc2NvcmU9Zml0Y2hfcG9zdG9yZGVyKG5vZGUubGVmdCkKICAgIHJpZ2h0X3NldHMscnNjb3JlPWZpdGNoX3Bvc3RvcmRlcihub2RlLnJpZ2h0KQogICAgdG90YWw9bHNjb3JlK3JzY29yZQogICAgcz1bXQogICAgZm9yIGkgaW4gcmFuZ2UobGVuZ3RoKToKICAgICAgICBpbnRlcj1sZWZ0X3NldHNbaV0uaW50ZXJzZWN0aW9uKHJpZ2h0X3NldHNbaV0pCiAgICAgICAgaWYgaW50ZXI6CiAgICAgICAgICAgICMgaWYgY2hpbGRyZW4gc2hhcmUgYSBiYXNlLCBwYXJlbnQgc2V0ID0gaW50ZXJzZWN0aW9uLCBubyBzdWJzdGl0dXRpb24gYWRkZWQKICAgICAgICAgICAgcy5hcHBlbmQoc2V0KGludGVyKSkKICAgICAgICBlbHNlOgogICAgICAgICAgICAjIGlmIGRpc2pvaW50LCBwYXJlbnQgc2V0ID0gdW5pb24sIGFuZCBhZGQgMSBzdWJzdGl0dXRpb24KICAgICAgICAgICAgcy5hcHBlbmQobGVmdF9zZXRzW2ldLnVuaW9uKHJpZ2h0X3NldHNbaV0pKQogICAgICAgICAgICB0b3RhbCs9MQogICAgbm9kZS5zZXRzPXMKICAgIHJldHVybiBzLHRvdGFsCgojIEZpdGNoIHRvcC1kb3duIGFzc2lnbm1lbnQKZGVmIGZpdGNoX3ByZW9yZGVyKG5vZGUscGFyZW50X2Fzc2lnbm1lbnQ9Tm9uZSk6CiAgICBpZiBwYXJlbnRfYXNzaWdubWVudCBpcyBOb25lOgogICAgICAgICMgQXQgdGhlIHJvb3Q6IGlmIHNldCBoYXMgb25lIGVsZW1lbnQsIHRha2UgaXQ7IGlmIG11bHRpcGxlLCBwaWNrIGFyYml0cmFyaWx5CiAgICAgICAgYXNzaWdubWVudD1bXQogICAgICAgIGZvciBpIGluIHJhbmdlKGxlbmd0aCk6CiAgICAgICAgICAgIGlmIGxlbihub2RlLnNldHNbaV0pPT0xOgogICAgICAgICAgICAgICAgYXNzaWdubWVudC5hcHBlbmQobmV4dChpdGVyKG5vZGUuc2V0c1tpXSkpKQogICAgICAgICAgICBlbHNlOgogICAgICAgICAgICAgICAgYXNzaWdubWVudC5hcHBlbmQoc29ydGVkKG5vZGUuc2V0c1tpXSlbMF0pICMgcGljayBvbmUgZGV0ZXJtaW5pc3RpY2FsbHkKICAgICAgICBub2RlLmFzc2lnbm1lbnQ9IiIuam9pbihhc3NpZ25tZW50KQogICAgZWxzZToKICAgICAgICAjIEF0IG90aGVyIG5vZGVzOiB0cnkgdG8gbWF0Y2ggcGFyZW50J3MgYXNzaWdubWVudCBpZiBwb3NzaWJsZQogICAgICAgIGFzc2lnbm1lbnQ9W10KICAgICAgICBmb3IgaSBpbiByYW5nZShsZW5ndGgpOgogICAgICAgICAgICBpZiBwYXJlbnRfYXNzaWdubWVudFtpXSBpbiBub2RlLnNldHNbaV06CiAgICAgICAgICAgICAgICBhc3NpZ25tZW50LmFwcGVuZChwYXJlbnRfYXNzaWdubWVudFtpXSkKICAgICAgICAgICAgZWxzZToKICAgICAgICAgICAgICAgIGFzc2lnbm1lbnQuYXBwZW5kKHNvcnRlZChub2RlLnNldHNbaV0pWzBdKQogICAgICAgIG5vZGUuYXNzaWdubWVudD0iIi5qb2luKGFzc2lnbm1lbnQpCiAgICAjIHJlY3Vyc2UgdG8gY2hpbGRyZW4KICAgIGlmIG5vZGUubGVmdDoKICAgICAgICBmaXRjaF9wcmVvcmRlcihub2RlLmxlZnQsbm9kZS5hc3NpZ25tZW50KQogICAgaWYgbm9kZS5yaWdodDoKICAgICAgICBmaXRjaF9wcmVvcmRlcihub2RlLnJpZ2h0LG5vZGUuYXNzaWdubWVudCkKCiMgUnVuIEZpdGNoIGFsZ29yaXRobQpwb3N0X3NldHMsc2NvcmU9Zml0Y2hfcG9zdG9yZGVyKHJvb3QpICMgYm90dG9tLXVwOiBzZXRzICsgcGFyc2ltb255IHNjb3JlCmZpdGNoX3ByZW9yZGVyKHJvb3QpICAgICAgICAgICAgICAgICAgIyB0b3AtZG93bjogYXNzaWduIGFjdHVhbCBzZXF1ZW5jZQoKIyBFeHRyYWN0IHJlc3VsdHMKcm9vdF9zZXF1ZW5jZT1yb290LmFzc2lnbm1lbnQKcHJpbnQocm9vdF9zZXF1ZW5jZSkgICMgbW9zdCBwYXJzaW1vbmlvdXMgcm9vdCBzZXF1ZW5jZQpwcmludChzY29yZSkgICAgICAgICAgIyB0b3RhbCBwYXJzaW1vbnkgc2NvcmUK