from Bio import SeqIO, SeqRecord, Seq
from Bio.Alphabet import IUPAC

gbfile = open("MTtabacum.gbk") # file avail. at: py3.us/mt.html
# mr stores the genbank record.
mr = SeqIO.read(gbfile, "genbank")
seqsforfasta = []
for x in mr.features:
    # Each Genebank record is full of features, the program
    # will walk over all the features.
    qf = x.qualifiers
    # Each feature has several parameters
    # Pick selected parameters.
    if 'NADH' in qf.get('product',[''])[0] and \
    'product' in qf and 'translation' in qf:
        id = qf['db_xref'][0][3:]
        desc = qf['product'][0]
        s = Seq.Seq(qf['translation'][0],IUPAC.protein)
        # 's' is a NADH protein sequence
        srec = SeqRecord.SeqRecord(s,id=id,description=desc)
        # 'srec' is a SeqRecord object from s sequence.
        seqsforfasta.append(srec)
        # Add this SeqRecord object into seqsforfasta list.
outf = open('/home/sb/t4.txt','w')
SeqIO.write(seqsforfasta,outf,'fasta')
# Write all the sequences as a FASTA file.
outf.close()

This code is part of the book "Python for Bioinformatics", by Sebastian Bassi (sbassi@genesdigitales.com). Return to home page.