#!/usr/bin/env python

from Bio import Translate
from Bio import Seq
from Bio.Alphabet import IUPAC
from Bio import Restriction
from Bio.Data import CodonTable

def backtrans(ori_pep,TableID=1):
    # Function to make backtranslation (from peptide to DNA)
    # This function needs the peptide sequence and the code of
    # translation table. Code number is the same as posted in:
    # http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
    def recurs(order, pos):
        for letter in bt[order[pos]]:
            if pos == len(order) - 1:
                yield letter
                continue
            for prox in recurs(order, pos+1):
                yield (letter+prox)
    def combine(order):
        ordened = set()
        for frase in recurs(order, 0):
            ordened.add(frase)
        return ordened
    t = CodonTable.generic_by_id[TableID]
    bt = dict()
    for a1 in "ATCG" :
        for a2 in "ATCG" :
            for a3 in "ATCG" :
                codon = a1+a2+a3
                try :
                    amino = t.forward_table[codon]
                except KeyError :
                    assert codon in t.stop_codons
                    continue
                try :
                    bt[amino].append(codon)
                except KeyError :
                    bt[amino] = [codon]
    return list(combine(ori_pep))

def seqcomp(s1,s2):
    # Compares 2 sequences and returns a value with
    # how many differents elements they have. 
    p = len(s1)
    for x,y in zip(s1,s2): # Walk through 2 sequences.
        if x==y:
            p -= 1
    return p

n_mut = 1  # Number or allowed mutations.
trans = Translate.unambiguous_dna_by_id[1]
builtin_seq = "ATGggtaaTtgcaacggggCATCCAAG".upper()
dna = Seq.Seq(builtin_seq, IUPAC.unambiguous_dna)
# Translate DNA sequence.
ori_pep = str(trans.translate(dna))
# Get all backtranslations.
bakpeps = backtrans(ori_pep)
print 'builtin_seq: %s\nPeptide: %s\n' %(builtin_seq,ori_pep)
print "ORIGINAL SEQUENCE:"
# Make a restriction analysis for the orignal sequence.
anal = Restriction.Analysis(Restriction.CommOnly, dna)
anal.print_as("map")
anal.print_that()
# Store the enzymes that cut in the original sequence.
enzORI = anal.with_sites().keys()
enzORIset = set(enzORI)
# Get a string out of the enzyme list, only for 
# printing purposes.
oname = str(enzORI)[1:-1]
# Note: str(enzORI)[1:-1] == ", ".join(str(n) for n in enzORI)
print "========================="

for x in bakpeps:
    if x not in builtin_seq:
        # Make a restriction analysis for each sequence.
        anal = Restriction.Analysis(Restriction.CommOnly, \
            Seq.Seq(x, IUPAC.unambiguous_dna))
        # Store the enzymes that cut in this sequence.
        enzTMP = anal.with_sites().keys()
        enzTMPset = set(enzTMP)
        # Get the number of mutations in backpep sequence.
        y = seqcomp(builtin_seq,x)
        if enzTMPset!=enzORIset and enzORI!=None and y<=n_mut:
            print 'Original sequence enzymes: %s' % oname
            # Get a string out of the enzyme list, only for 
            # printing purposes.
            pames = str(enzTMP)[1:-1]
            print 'Proposed sequence enzymes: %s' % pames
            anal.print_as("map")
            anal.print_that()
            # o: Only in original sequences, p: proposed seq.
            o = str(list(enzORIset.difference(enzTMPset)))[1:-1]
            p = str(list(enzTMPset.difference(enzORIset)))[1:-1]
            print "Enzimes only in original sequence: %s\n" % o
            print "Enzimes only in proposed sequence: %s" % p
            print "========================="

This code is part of the book "Python for Bioinformatics", by Sebastian Bassi (sbassi@genesdigitales.com). Return to home page.