Example solutions for script 06_introduction_to_files

Exercise 6.2

import csv
with open("fasta_stat.csv", "w") as fh_csv:
    
    csv_writer = csv.writer(fh_csv)
    csv_writer.writerow(["description", "symbol_count", "gc content"])
    
    with open("short.fasta", "r") as fh_fasta:
    
        for line in fh_fasta:
            line = line.rstrip()
    
            if len(line) > 0 and line[0] == ">":
                last_status = line
                count = 0
                gc_count = 0
            elif line == "":
                relative_gc_count = gc_count / count
                print(count, gc_count, last_status)
                csv_writer.writerow([last_status, count, relative_gc_count])
            else:
                count += len(line)
                gc_count += line.count("G") + line.count("C")
740 441 >gi|2765658|emb|Z78533.1|CIZ78533 C.irapeanum 5.8S rRNA gene and ITS1 and ITS2 DNA
753 365 >gi|2765657|emb|Z78532.1|CCZ78532 C.californicum 5.8S rRNA gene and ITS1 and ITS2 DNA
748 427 >gi|2765656|emb|Z78531.1|CFZ78531 C.fasciculatum 5.8S rRNA gene and ITS1 and ITS2 DNA
744 354 >gi|2765655|emb|Z78530.1|CMZ78530 C.margaritaceum 5.8S rRNA gene and ITS1 and ITS2 DNA
733 351 >gi|2765654|emb|Z78529.1|CLZ78529 C.lichiangense 5.8S rRNA gene and ITS1 and ITS2 DNA
718 364 >gi|2765652|emb|Z78527.1|CYZ78527 C.yatabeanum 5.8S rRNA gene and ITS1 and ITS2 DNA

Exercise 6.4

import csv
with open("amino_acids.csv", "r") as fh:
    for line in csv.reader(fh, delimiter=","):
        print(line)
['1-letter code', '3-letter code', 'Chemical formula', 'Monoisotopic', 'Average']
['A', 'Ala', 'C3H5ON', '89.0476750638', '89.09408']
['R', 'Arg', 'C6H12ON4', '174.11167506380002', '174.20278']
['N', 'Asn', 'C4H6O2N2', '132.0534950638', '132.11908']
['D', 'Asp', 'C4H5O3N', '133.03750506379998', '133.10388']
['C', 'Cys', 'C3H5ONS', '121.0197550638', '121.15408000000001']
['E', 'Glu', 'C5H7O3N', '147.05315506379998', '147.13078']
['Q', 'Gln', 'C5H8O2N2', '146.06914506380002', '146.14597999999998']
['G', 'Gly', 'C2H3ON', '75.0320250638', '75.06718000000001']
['H', 'His', 'C6H7ON3', '155.06947506379998', '155.15637999999998']
['I', 'Ile', 'C6H11ON', '131.09462506379998', '131.17468']
['L', 'Leu', 'C6H11ON', '131.09462506379998', '131.17468']
['K', 'Lys', 'C6H12ON2', '146.10552506379997', '146.18938']
['M', 'Met', 'C5H9ONS', '149.0510550638', '149.20788']
['F', 'Phe', 'C9H9ON', '165.0789750638', '165.19188']
['P', 'Pro', 'C5H7ON', '115.06332506380001', '115.13198']
['S', 'Ser', 'C3H5O2N', '105.04259506380001', '105.09348']
['T', 'Thr', 'C4H7O2N', '119.0582450638', '119.12038']
['W', 'Trp', 'C11H10ON2', '204.08987506379998', '204.22848']
['Y', 'Tyr', 'C9H9O2N', '181.07389506380002', '181.19127999999998']
['V', 'Val', 'C5H9ON', '117.0789750638', '117.14788']

Exercise 6.5

import csv

count = 0
mono_sum = 0.0

with open("amino_acids.csv", "r") as fh:
    r = csv.reader(fh, delimiter=",")
    next(r)
    for line in r:
        mono_sum += float(line[3])
        count += 1
            
print("average monoisotopic mass is", mono_sum / count)
average monoisotopic mass is 136.81628106379998

Exercise 6.6

import csv

count = 0
mono_sum = 0.0
max_mass = 0

with open("amino_acids.csv", "r") as fh:
    r = csv.reader(fh, delimiter=",")
    header = next(r)
    for line in r:
        mono_mass = float(line[3])
        if mono_mass > max_mass:
            max_mass = mono_mass
        mono_sum += mono_mass
        count += 1
            
print("average monoisotopic mass is", mono_sum / count)
print("max monoisotopic mass is", max_mass)
average monoisotopic mass is 136.81628106379998
max monoisotopic mass is 204.08987506379998

Exercise 6.8 (includes solution for 6.7)

(asking the user until he enters only one sympol was not part of the exercise desciption, I added it just for demonstration purposes):

while True:
    
    one_letter_code = input("tell me a one letter code: ").upper()
    if len(one_letter_code) != 1:
        print("this is not a one letter input, try again")
    else:
        break

found = False

with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)

    for line in r:
        if line[0] == one_letter_code:
            print(one_letter_code, "has monoisotopic mass", line[3], ", its molecular formula is", line[2])
            found = True

if not found:
    print("the given code is invalid")
tell me a one letter code: T
T has monoisotopic mass 119.0582450638 , its molecular formula is C4H7O2N