# -*- coding: utf-8 -*- """ """ #libraries used from Bio.Seq import Seq from Bio import SeqIO from Bio.SubsMat.MatrixInfo import blosum62 from Bio import Align aligner = Align.PairwiseAligner() import csv import operator #Declaration of enzyme sequences to search for Q84UC0=Seq("MGGGEGIEVRSGSSSTKLAFGERITHAKPPFSISQIKKAIPPHCFQRSLYRSFSYVIFDFIFASTFYHIAATNFHRLPHPLHYLAWPLYWFCQGSVFTGLWVIAHECGHRAFSDYQLVDDVVGFLLHTSFLIPYFSFKISHRRHHSNTASLERDEVFVPKPKAKMPWYFKHLTNPPARVLIIFITLTLGWPMYLAFNISGRFYERFTSHFDPNSPIFSENEWLQVHISNAGIVAVWYLLYKLAAAKGIAWVIRMYVVPVTIMNAFVVLITSLQHTHPSFPYYDSTEWNWLRGNLVTLDRDYGILNKVFHNITDTHVVHHLFPSMPHYNAMEATRAVKQVLGEYYHFDGTPIFKAAWREFRECIYVEPDNDEGASSSSKGVFWFRNKL", alphabet=IUPAC.IUPACProtein ) Q84UB8=Seq("MGADGTMSPVLTKRRPDQEINKLDIKPNHEVDIARRAPHSKPPFTLSDLRSAIPPHCFHRSLLMSSSYLIRDFALAFLFYHSAVTYIPLLPKPLACMAWPVYWFLQGSNMLGIWVIAHECGHQAFSNYGWVNDAVGFFLHTSLLVPYFPFKYSHRRHHSNTNSVEHDEVFVPRHKDGVQWYYRFFNNTPGRVLTLTLTLLVGWPSYLAFNASGRPYDGFASHYNPNAQIFNLRERFWVHVSNIGILAIYYILYRLATTKGLPWLLSIYGVPVLILNAFVVLITFLQHSHPALPHYNSDEWDWLRGALATVDRDYGFLNEVFHDITDTHVIHHLFPTMPHYNAKEATVSIRPILKDYYKFDRTPIWRALWREAKECLYVEADGTGSKGVLWFKSKF", alphabet=IUPAC.IUPACProtein ) Q9FPP7=Seq("MGKAASAKKVLERVPISKPPFEYNDLKKAVPPHCFSRPLSRSLYFLFHDIIVTCILFYVASNYIHMLPRFLSCIVWPVYWISQGVFLGRLWMIGHECGHHSFSNYRWVDDTVGFLIHTATLTPYFSFKYSHRNHHAHTNSMEYDEVHIPKRKSEALYFEFLGNNPIGLMITMLCKLTFGYAAYIMFNYTGKKHKSGGLASHFYPQSPLFNDSERNHVLFSDIGICIVLYACYRIVTVTGAMPAFYVYGIPWVIMSAILFAATYLQHTHPSIPHYDTTEWNWLRGALSTIDRDLGFFNMNKTHYHVIHHLFPVIPEYHAQEATEAIKPILGQYYKYDGTPFLKALWREMKECIYVESDEGQKKQGIYWFKNKT", alphabet=IUPAC.IUPACProtein ) Q9FPP8=Seq("MGKGASNKKVLERVPITKPPFEYNDLKKAVPPHCFSRPLFRSFYFLLHDIIVTCILFYVASNYIPMLPGFLSYIVWPVYWISQGVFLGRLWMIGHECGHHSFSNYRWVDDSVGFLIHTATLTPYFSFKYSHRNHHAHTNSMEYDEVHIPKRKSEALDLYFEFLGNNPMGLMITMLCKLTFGYAAYIMFNYTGKKHKSGGLASHFYPQSPLFNDSERNHVLFSDVGICIVLYACYRIVMVTGAMSAFYVYGIPWVIMSAILFAATYLQHTHPSIPHYDTTEWNWLRGALSTIDRDLGFFNMNKTHYHVIHHLFPVIPEYHAQEATEAIKPILGQYYKYDGTPFLKALWREMKDCIYVESDQGQKKQGIYWFKNKI", alphabet=IUPAC.IUPACProtein ) Q8GZC2=Seq("MGAGGRMSVAPNNSKCEKKESRSVKRVPHTKPPFTLGQLKQAIPSHCFKRSLLRSFSYVVYDLSLSFIFYSIATTYFHLLPSPITYIAWPVYWAFQGCILTSVWVLGHECGHHAFSEYNWLDDTIGLILHSSLLVPYFSFKISHRRHHSNIASLERDEVFVPRLKSAIPWYSKYLNNPPGRALTLVATLFIGWPLYLAFNVSGRYYDRFACHYDPYSPIYSDRERLQIYISDAMIFVAAYVLYKIAMAKGLAWLVCIYGVPLLIVNALVVTITSLQHTHVALPHYDSSEWDWLRGGLATVDRDYGVFNKIFHNATDTHVIHHLFSSMPHYHGVEATRAIKPILGDYYLFDDTPIHVALWREAKECLFVEPDEGDNNNGVFWYSNKF", alphabet=IUPAC.IUPACProtein ) U5LN76=Seq("MGAGGNLPSAHRRAPHSKPPFTLSHVRKAIPPHCFRRSLFRSFSYVFADLAVILSLSYAAANYFHLLPAPLQYLTWPALWLVQGFFMVGYWVLAHECGHHAFSDYPVLNDVVGFLIHSSLLVPYFSWKISHRIHHANANVLERDESFVPALKSNIPWYNRYFNNPPGRALLLLASALSGWPLYLLCIITGRSYDRWACHFDPYSPMYTERERFLIYLSDAGVLAAVYGLYRLTLVNGPEWLLLYYAAPLLVVHATIVVIIYLHHTHPSLPRYDSSEWDWLRGALATVDRDYGILNIVFHHIADTHVLHHLLPSVPHYHAAEATKAIKPVLGEYYQFDGTPVLKGLWREVKECVYVEPAAGDEAGGPQAKGIFWFNNKGL", alphabet=IUPAC.IUPACProtein ) Q9SP61=Seq("MGGRGAIGVLRNGGGPKKKMGPGQGLGPGERITHARPPFSISQIKKAIPPHCFQRSLRRSFSYLLSDIALVSAFYYVADTYFHRLPHPLLHYLAWPVYWFCQGAVLTGMWGIAHDCGHHAFSDYQLVDDVVGFLIHSLVFVPYFSFKISHRRHHSNTSSVDRDEVFVPKPKAKMPWYFKYLTNPPARVFIIFITLTLGWPMYLTFNISGRYYGRFTSHFDPNSPIFSPKERVLVHISNAGLVATGYLLYRIAMAKGVGWLIRLYGVPLIVLNACVVLITALQHTHPSFPYYDSTEWDWLRGNLVTVDRDYGPIMNRVFHHITDTHVVHHLFPSMPHYNGKEATVAAKRILGEYYQFDGTPIWKAAWREFRECVYVEPDEDDGATSGSSSKGVFWYHNKL", alphabet=IUPAC.IUPACProtein ) Q9SP62=Seq("MGEVGPTNRTKTKLDKQQESENRVPHEPPPFTLSDLKKAIPPHCFERSLVKSFYHVIHDIIILSFFYYVAANYIPMLPQNLRYVAWPIYWAIQGCVQLGILVLGHECGHHAFSDYQWVDDMVGFVLHSSQLIPYFSWKHSHRRHHSNTASIERDEVYPPAYKNDLPWFAKYLRNPVGRFLMIFGALLFGWPSYLLFNANGRLYDRFASHYDPQSPIFNNRERLQVIASDVGLVFAYFVLYKIALAKGFVWLICVYGVPYVILNGLIVLITFLQHTHPNLPRYDLSEWDWLRGALSTVDRDYGMLNKVFHNVTDTHLVHHLFTTMPHYRAKEATEVIKPILGDYYKFDDTPFLKALWKDMGKCIYVESDVPGKNKGVYWYNNDI", alphabet=IUPAC.IUPACProtein ) Q9SCG2=Seq("MGAGGRMSDPSEGKNILERVPVDPPFTLSDLKKAIPTHCFERSVIRSSYYVVHDLIVAYVFYYLANTYIPLIPTPLAYLAWPVYWFCQASILTGLWVIGHECGHHAFSDYQLIDDIVGFVLHSALLTPYFSWKYSHRNHHANTNSLDNDEVYIPKRKSKVKIYSKLLNNPPGRVFTLVFRLTLGFPLYLLTNISGKKYGRFANHFDPMSPIFNDRERVQVLLSDFGLLAVFYAIKLLVAAKGAAWVINMYAIPVLGVSVFFVLITYLHHTHLSLPHYDSTEWNWIKGALSTIDRDFGFLNRVFHDVTHTHVLHHLISYIPHYHAKEARDAIKPVLGEYYKIDRTPIFKAMYREAKECIYIEPDEDSEHKGVFWYHKM", alphabet=IUPAC.IUPACProtein ) enzymeList=["Q84UC0", "Q84UB8", "Q9FPP7", "Q9FPP8", "Q8GZC2", "U5LN76", "Q9SP61", "Q9SP62", "Q9SCG2"] #extracting sequences from fasta into a dictionary: Name->Sequence ab_dict = {} for seq_record in SeqIO.parse("ab.fasta", "fasta"): ab_dict[str(seq_record.id)]=repr(seq_record.seq) #Local alignment using blosum62 aligner.mode = 'local' aligner.open_gap_score = -10 aligner.extend_gap_score = -0.5 aligner.substitution_matrix = blosum62 i=0 for enzyme in enzymeList: #Store the score and alignments iinto dictionnaries: Name->score/alignment score_dict={} alignment_dict={} for seq in ab_dict.keys(): #print(seq) #print(ab_dict[str(seq)]) alignment_dict[seq]=aligner.align(enzyme, ab_dict[str(seq)]) score_dict[seq]=aligner.score(enzyme, ab_dict[str(seq)]) #write the results in a csv file sorted_score = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True) #Optional: write the alignments too #with open("Alignements"+str(i)+'.csv', 'w') as csvFile: # writer = csv.writer(csvFile) # writer.writerow(str(alignment_dict["Haimp10002232m"][0])) # writer.writerow(str(alignment_dict["Haimp10040446m"][0])) with open("Scores_enz"+str(i)+'.csv', 'w') as csvFile: writer = csv.writer(csvFile) writer. writerows(sorted_score) i+=1 csvFile. close()