from Bio import SeqIO
from Bio.SeqRecord import SeqRecord

gbfile = open("MTtabacum.gbk") # file avail. at: py3.us/mt.html
# The first genbank record is named mr
mr = SeqIO.read(gbfile, "genbank")
gbfile.close()
seqsforfasta = []
tg = (['cox2'],['atp6'],['atp9'],['cob'])
for x in mr.features:
    if x.qualifiers.get('gene') in tg and x.type=='gene':
        # Get the name of the gene
        genename = x.qualifiers.get('gene')
        # Get the start position
        startpos = x.location.start.position
        # Get the required slice
        newfrag = mr.seq[startpos-1000:startpos]
        # Build a SeqRecord object
        newrec = SeqRecord(newfrag, genename[0]+
                           ' 1000bp upstream','','')
        seqsforfasta.append(newrec)
outf = open('t4.txt','w')
# Write all the sequences as a FASTA file.
SeqIO.write(seqsforfasta,outf,'fasta')
outf.close()

This code is part of the book "Python for Bioinformatics", by Sebastian Bassi (sbassi@genesdigitales.com). Return to home page.