#!/usr/bin/env python

import csv
import sys
import gzip

f_name = 'TAIR7_Transcripts_by_map_position.gz'
# Get a file handler of an uncompressed file:
f_unzip = gzip.GzipFile(f_name)
lines = csv.reader(f_unzip, delimiter='\t')
lines.next() # Ignore the header
# Dictionary for storing markers and associated information:
atD = {}
# Load the dictionary using the data in the file: 
for line in lines:
    if line[0] in atD:
        tup = atD[line[0]]
        l7 = int(line[7])
        left = l7 if l7<int(tup[1]) else tup[1]
        l8 = int(line[8])
        right = l8 if l8>int(tup[2]) else tup[2]
        atD[line[0]] = (tup[0],left,right)
    else:
        atD[line[0]] = (line[5],int(line[7]),int(line[8]))
# Prints the contend of the dictionary to a CSV file:
out_fname= 'TAIR7.csv'
o_fh = open(out_fname,'w')
for x in atD:
    chrom = atD[x][0] # Chromosome number
    s_pos = atD[x][1] # Start position
    e_pos = atD[x][2] # End position
    o_fh.write('%s,%s,%s,%s\n' %(x,chrom,s_pos,e_pos))
o_fh.close()

This code is part of the book "Python for Bioinformatics", by Sebastian Bassi (sbassi@genesdigitales.com). Return to home page.