from Bio import SeqIO, SeqRecord, Seq
from Bio.Alphabet import IUPAC
gbfile = open("MTtabacum.gbk") # file avail. at: py3.us/mt.html
# mr stores the genbank record.
mr = SeqIO.read(gbfile, "genbank")
seqsforfasta = 
for x in mr.features:
# Each Genebank record is full of features, the program
# will walk over all the features.
qf = x.qualifiers
# Each feature has several parameters
# Pick selected parameters.
if 'NADH' in qf.get('product',['']) and \
'product' in qf and 'translation' in qf:
id = qf['db_xref'][3:]
desc = qf['product']
s = Seq.Seq(qf['translation'],IUPAC.protein)
# 's' is a NADH protein sequence
srec = SeqRecord.SeqRecord(s,id=id,description=desc)
# 'srec' is a SeqRecord object from s sequence.
# Add this SeqRecord object into seqsforfasta list.
outf = open('/home/sb/t4.txt','w')
# Write all the sequences as a FASTA file.
This code is part of the book "Python for Bioinformatics", by Sebastian Bassi (email@example.com). Return to home page.