Convert TAIR fasta file in a CSV file for making a
SQLite database.
from Bio import SeqIO

seqfile = open('TAIR8_seq_20080412')
cdsfile = open('TAIR8_cds_20080412')
f_out = open('TAIR.csv','w')
atD = {}
# Get all sequences from TAIR sequences file.
for record in SeqIO.parse(seqfile, "fasta"):
    sid = record.id
    seq = record.seq.data
    atD[sid] = [seq]
# Get all sequences from TAIR CDS file.
for record in SeqIO.parse(cdsfile, "fasta"):
    sid = record.id
    seq = record.seq.data
# Write to a CSV file only the entries of the dictionary that
# has data from both sources
for x in atD:
    if len(atD[x])==2:
        # Write in this order: Seq. ID, CDS, SEQ.
        f_out.write('%s,%s,%s\n' %(x,atD[x][1],atD[x][0]))

This code is part of the book "Python for Bioinformatics", by Sebastian Bassi (sbassi@genesdigitales.com). Return to home page.