Example solutions for script 07_container_types¶

Exercise 1.2¶

numbers = []
for i in range(17):
    numbers.append(2 ** i)
print(numbers)
print(sum(numbers))

[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536]
131071

This can be expressed much simpler by using a so called list comprehension which we did not handle in the script yet:

numbers = [2 ** i for i in range(17)]
print(numbers)

[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536]

Exercise 1.3¶

filtered_numbers = []
for number in numbers:
    if number >= 1000 and number <= 9999:
        filtered_numbers.append(number)
print(filtered_numbers)

[1024, 2048, 4096, 8192]

Exercise 1.4¶

numbers = []
while True:
    user_input = input("please enter a number or 'x' if you are done: ").strip()
    if user_input.lower() == "x":
        break
    numbers.append(float(user_input))
    
if len(numbers) == 0:
    print("can't compute min, max, average.")
else:
    print("the minimum of the values you entered is", min(numbers))
    print("the maximum of the values you entered is", max(numbers))
    print("the average of the values you entered is", sum(numbers) / len(numbers))

please enter a number or 'x' if you are done: 1
please enter a number or 'x' if you are done: 3
please enter a number or 'x' if you are done: 2
please enter a number or 'x' if you are done: x
the minimum of the values you entered is 1.0
the maximum of the values you entered is 3.0
the average of the values you entered is 2.0

Exercise 1.5¶

n = 18

divider = 2
is_prime = True

while divider * divider <= n:
    if n % divider == 0:
        is_prime = False
        break
    divider += 1
        
print(n, "is a prime:", is_prime)

18 is a prime: False

Exercise 1.6¶

primes = []

for n in range(2, 1001):
    divider = 2
    is_prime = True
    while divider * divider <= n:
        if n % divider == 0:
            is_prime = False
            break
        divider += 1
    if is_prime:
        primes.append(n)
        
print(primes)

[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997]

Exercise 1.7¶

status_lines = []

with open("short.fasta", "r") as fh:
    for line in fh:
        if line.startswith(">"):
            status_lines.append(line.rstrip())

for line in sorted(status_lines):
    print(line)
    
# alternative: write the status lines to a csv file:

import csv
with open("sorted_status_lines.csv", "w", newline="") as fh:
    w = csv.writer(fh)
    for line in sorted(status_lines):
        w.writerow([line])

>gi|2765652|emb|Z78527.1|CYZ78527 C.yatabeanum 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765654|emb|Z78529.1|CLZ78529 C.lichiangense 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765655|emb|Z78530.1|CMZ78530 C.margaritaceum 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765656|emb|Z78531.1|CFZ78531 C.fasciculatum 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765657|emb|Z78532.1|CCZ78532 C.californicum 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765658|emb|Z78533.1|CIZ78533 C.irapeanum 5.8S rRNA gene and ITS1 and ITS2 DNA

Exercise 1.8¶

status_lines = []
sequences = []
lengths = []

# we collect data:

with open("short.fasta", "r") as fh:
    for line in fh:
        line = line.rstrip()
        if line.startswith(">"):
            last_status = line
            sequence = ""
        elif line == "":
            status_lines.append(last_status)
            sequences.append(sequence)
            lengths.append(len(sequence))
        else:
            sequence += line

# compute the length of the longest sequence
max_len = max(lengths)

# filter collected data:
for i in range(len(lengths)):
    if lengths[i] == max_len:
        print(status_lines[i])
        print(sequences[i])

>gi|2765657|emb|Z78532.1|CCZ78532 C.californicum 5.8S rRNA gene and ITS1 and ITS2 DNA
CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAACAGAATATATGATCGAGTGAATCTGGAGGACCTGTGGTAACTCAGCTCGTCGTGGCACTGCTTTTGTCGTGACCCTGCTTTGTTGTTGGGCCTCCTCAAGAGCTTTCATGGCAGGTTTGAACTTTAGTACGGTGCAGTTTGCGCCAAGTCATATAAAGCATCACTGATGAATGACATTATTGTCAGAAAAAATCAGAGGGGCAGTATGCTACTGAGCATGCCAGTGAATTTTTATGACTCTCGCAACGGATATCTTGGCTCTAACATCGATGAAGAACGCAGCTAAATGCGATAAGTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCTCGAGGCCATCAGGCTAAGGGCACGCCTGCCTGGGCGTCGTGTGTTGCGTCTCTCCTACCAATGCTTGCTTGGCATATCGCTAAGCTGGCATTATACGGATGTGAATGATTGGCCCCTTGTGCCTAGGTGCGGTGGGTCTAAGGATTGTTGCTTTGATGGGTAGGAATGTGGCACGAGGTGGAGAATGCTAACAGTCATAAGGCTGCTATTTGAATCCCCCATGTTGTTGTATTTTTTCGAACCTACACAAGAACCTAATTGAACCCCAATGGAGCTAAAATAACCATTGGGCAGTTGATTTCCATTCAGATGCGACCCCAGGTCAGGCGGGGCCACCCGCTGAGTTGAGGC

Exercise 2.2 + 2.3¶

%matplotlib inline
import matplotlib.pyplot as plt


iter_counts = []
start_values = []
for n in range(2, 10000):
    start_values.append(n)
    
    iter_count = 0
    while n != 1:
        iter_count += 1
        if n % 2 == 0:
            n = n // 2
        else:
            n = 3 * n + 1
    iter_counts.append(iter_count)
    
plt.figure(figsize=(15, 8))
plt.plot(start_values, iter_counts, 'g.', markersize=1)
plt.show()

Exercise 3.2¶

codons = []
with open("codons.txt") as fh:
    for line in fh:
        fields = line.split()
        codons.append(fields[0])
        codons.append(fields[5])
        codons.append(fields[10])
        codons.append(fields[15])
print(sorted(codons))

['AAA', 'AAC', 'AAG', 'AAU', 'ACA', 'ACC', 'ACG', 'ACU', 'AGA', 'AGC', 'AGG', 'AGU', 'AUA', 'AUC', 'AUG', 'AUU', 'CAA', 'CAC', 'CAG', 'CAU', 'CCA', 'CCC', 'CCG', 'CCU', 'CGA', 'CGC', 'CGG', 'CGU', 'CUA', 'CUC', 'CUG', 'CUU', 'GAA', 'GAC', 'GAG', 'GAU', 'GCA', 'GCC', 'GCG', 'GCU', 'GGA', 'GGC', 'GGG', 'GGU', 'GUA', 'GUC', 'GUG', 'GUU', 'UAA', 'UAC', 'UAG', 'UAU', 'UCA', 'UCC', 'UCG', 'UCU', 'UGA', 'UGC', 'UGG', 'UGU', 'UUA', 'UUC', 'UUG', 'UUU']

Alternative solution:

codons = []
with open("codons.txt") as fh:
    for line in fh:
        fields = line.split()
        for i in  [0, 5, 10, 15]:
            codons.append(fields[i])
print(sorted(codons))

['AAA', 'AAC', 'AAG', 'AAU', 'ACA', 'ACC', 'ACG', 'ACU', 'AGA', 'AGC', 'AGG', 'AGU', 'AUA', 'AUC', 'AUG', 'AUU', 'CAA', 'CAC', 'CAG', 'CAU', 'CCA', 'CCC', 'CCG', 'CCU', 'CGA', 'CGC', 'CGG', 'CGU', 'CUA', 'CUC', 'CUG', 'CUU', 'GAA', 'GAC', 'GAG', 'GAU', 'GCA', 'GCC', 'GCG', 'GCU', 'GGA', 'GGC', 'GGG', 'GGU', 'GUA', 'GUC', 'GUG', 'GUU', 'UAA', 'UAC', 'UAG', 'UAU', 'UCA', 'UCC', 'UCG', 'UCU', 'UGA', 'UGC', 'UGG', 'UGU', 'UUA', 'UUC', 'UUG', 'UUU']

Exercise 3.3¶

import csv

with open("codons.csv", "w") as fh_in:
    writer = csv.writer(fh_in)

    with open("codons.txt", "r") as fh:
    
        for line in fh:
            fields = line.split()
            for i in  [0, 5, 10, 15]:
                writer.writerow([fields[i], fields[i+1]])
                

# check:
with open("codons.csv", "r") as fh:
    reader = csv.reader(fh)
    for row in reader:
        print(row)

['UUU', 'F']
['UCU', 'S']
['UAU', 'Y']
['UGU', 'C']
['UUC', 'F']
['UCC', 'S']
['UAC', 'Y']
['UGC', 'C']
['UUA', 'L']
['UCA', 'S']
['UAA', '*']
['UGA', '*']
['UUG', 'L']
['UCG', 'S']
['UAG', '*']
['UGG', 'W']
['CUU', 'L']
['CCU', 'P']
['CAU', 'H']
['CGU', 'R']
['CUC', 'L']
['CCC', 'P']
['CAC', 'H']
['CGC', 'R']
['CUA', 'L']
['CCA', 'P']
['CAA', 'Q']
['CGA', 'R']
['CUG', 'L']
['CCG', 'P']
['CAG', 'Q']
['CGG', 'R']
['AUU', 'I']
['ACU', 'T']
['AAU', 'N']
['AGU', 'S']
['AUC', 'I']
['ACC', 'T']
['AAC', 'N']
['AGC', 'S']
['AUA', 'I']
['ACA', 'T']
['AAA', 'K']
['AGA', 'R']
['AUG', 'M']
['ACG', 'T']
['AAG', 'K']
['AGG', 'R']
['GUU', 'V']
['GCU', 'A']
['GAU', 'D']
['GGU', 'G']
['GUC', 'V']
['GCC', 'A']
['GAC', 'D']
['GGC', 'G']
['GUA', 'V']
['GCA', 'A']
['GAA', 'E']
['GGA', 'G']
['GUG', 'V']
['GCG', 'A']
['GAG', 'E']
['GGG', 'G']

Exercise 4.2¶

fib_numbers = [1, 1]
while len(fib_numbers) < 100:
    fib_numbers.append(fib_numbers[-2] + fib_numbers[-1])
    
print(fib_numbers)

[1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296, 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049, 12586269025, 20365011074, 32951280099, 53316291173, 86267571272, 139583862445, 225851433717, 365435296162, 591286729879, 956722026041, 1548008755920, 2504730781961, 4052739537881, 6557470319842, 10610209857723, 17167680177565, 27777890035288, 44945570212853, 72723460248141, 117669030460994, 190392490709135, 308061521170129, 498454011879264, 806515533049393, 1304969544928657, 2111485077978050, 3416454622906707, 5527939700884757, 8944394323791464, 14472334024676221, 23416728348467685, 37889062373143906, 61305790721611591, 99194853094755497, 160500643816367088, 259695496911122585, 420196140727489673, 679891637638612258, 1100087778366101931, 1779979416004714189, 2880067194370816120, 4660046610375530309, 7540113804746346429, 12200160415121876738, 19740274219868223167, 31940434634990099905, 51680708854858323072, 83621143489848422977, 135301852344706746049, 218922995834555169026, 354224848179261915075]

Exercise 4.3¶

import csv

one_letter_codes = []
average_masses = []

with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)

    for line in r:
        one_letter_codes.append(line[0])
        average_masses.append(float(line[4]))
        
print(one_letter_codes)
print(average_masses)

['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
[89.09408, 174.20278, 132.11908, 133.10388, 121.15408000000001, 147.13078, 146.14597999999998, 75.06718000000001, 155.15637999999998, 131.17468, 131.17468, 146.18938, 149.20788, 165.19188, 115.13198, 105.09348, 119.12038, 204.22848, 181.19127999999998, 117.14788]

Exercise 4.4¶

import csv

one_letter_codes = []
average_masses = []

with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)
    
    for line in r:
        one_letter_codes.append(line[0])
        average_masses.append(float(line[4]))

symbol = input("tell me a symbol: ")
if symbol in one_letter_codes:
    index = one_letter_codes.index(symbol)
    print("mass of", symbol, "is", average_masses[index])
else:
    print("this is not a valid symbol, try again !")

tell me a symbol: A
mass of A is 89.09408

I extended the exercise a bit and ask until the user provides a valid symbol below. We can use the test if symbol in one_letter_codes to check if symbol is a known symbol:

import csv

one_letter_codes = []
average_masses = []

with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)
    
    for line in r:
        one_letter_codes.append(line[0])
        average_masses.append(float(line[4]))

while True:
    symbol = input("tell me a symbol: ")
    if symbol in one_letter_codes:
        break
    print("this is not a valid symbol, try again !")
    
index = one_letter_codes.index(symbol)
print("mass of", symbol, "is", average_masses[index])

tell me a symbol: A
mass of A is 89.09408

Exercise 4.5¶

In addition to the exercise I added some extra code to handle invalid symbols in the users input:

import csv

one_letter_codes = []
average_masses = []

with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)
    
    for line in r:
        one_letter_codes.append(line[0])
        average_masses.append(float(line[4]))


sequence = input("tell me a sequence: ")

cleaned_sequence = ""
full_mass = 0

for symbol in sequence:
    if symbol in one_letter_codes:
        index = one_letter_codes.index(symbol)
        full_mass += average_masses[index]
        cleaned_sequence += symbol
        
skipped = len(sequence) - len(cleaned_sequence)
if skipped > 0:
    print("you provided", skipped, "invalid symbols which I skipped")
    
sequence_mass = full_mass - (len(cleaned_sequence) - 1) * 18.01528    
print("the mass of the sequence", cleaned_sequence, "is", sequence_mass)

tell me a sequence: FSYC
the mass of the sequence FSYC is 518.58488

Exercise 6.1¶

doubled = {}
for i in range(1, 11):
    doubled[i] = 2 * i
print(doubled[4])

8

Exercise 6.2¶

import csv

codon_to_aa = {}

with open("codons.csv", "w") as fh_in:
    writer = csv.writer(fh_in)

    with open("codons.txt", "r") as fh:
    
        for line in fh:
            fields = line.split()
    
            for i in [0, 5, 10, 15]:
                codon_to_aa[fields[i]] = fields[i + 1]
                
print(codon_to_aa["UUU"])

F

Exercise 6.3¶

symbol_to_mass = {}

with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)

    for line in r:
        symbol = line[0]
        mass = float(line[3])
        symbol_to_mass[symbol] = mass
        
while True:
    
    one_letter_code = input("tell me a one letter code: ").upper()
    if len(one_letter_code) != 1:
        print("this is not a one letter input, try again")
    else:
        break
        
mass = symbol_to_mass[one_letter_code]
print("mass of", one_letter_code, "is", mass)

tell me a one letter code: T
mass of T is 119.0582450638

Exercise 7.2¶

Updating the histogram takes here a slightly different approach than we presented in the script (Compare how we updated the dictionary in the word histogram example in the script !):

sequence = input("sequence ? ")

histogram = {}
for symbol in sequence:
    if symbol not in histogram.keys():
        histogram[symbol] = 0
    histogram[symbol] += 1
    
print(histogram)

sequence ? ABCDABCD
{'A': 2, 'B': 2, 'C': 2, 'D': 2}

Exercise 7.3¶

This solutions also skips the tail of a RNA sequence if its length is not a multiple of three. We use slicing as introduced in the script about strings here:

codons = {}

with open("codon_table.txt", "r") as fh:
    next(fh)
    for line in fh:
        fields = line.split()
        for i in  [0, 5, 10, 15]:
            codons[fields[i]] = fields[i + 1]
    

rna_seq = input("please provide a rna sequence: ").replace(" ", "")

aa_sequence = ""

for start in range(0, len(rna_seq), 3):
    if start + 2 >= len(rna_seq):
        print("skipped tail", rna_seq[start:])   ### slicing !
        break
    codon = rna_seq[start:start + 3]             ### slicint !
    if codon in codons.keys():
        aa_sequence += codons[codon]
    else:
        aa_sequence += "*"

print("aa sequence is", aa_sequence)

please provide a rna sequence: UUUUCUUAUUUUXXX
aa sequence is FSYF*

Exercise 8.2¶

values = [1, 2, 3, 2, 7]
groups = [1, 0, 0, 1, 1]

assignments = {}
for i in range(len(values)):
    group = groups[i]
    value = values[i]
    if group not in assignments.keys():
        assignments[group] = []
    assignments[group].append(value)

for group in assignments.keys():
    values = assignments[group]
    avg = sum(values) / len(values)
    print("avg of group", group, "is", avg)

avg of group 0 is 2.5
avg of group 1 is 3.3333333333333335

Exercise 8.3¶

We first create a csv file with Python (this was not part of the exercise, it is fine if you created the file manually):

import csv
with open("grouped_data.csv", "w") as fh:
    writer = csv.writer(fh)
    writer.writerow(["group", "value"])
    
    for value in range(12):
        group = value % 3
        writer.writerow([group, value])

Now we read the data from the csv file:

import csv
with open("grouped_data.csv", "r") as fh:
    reader = csv.reader(fh)
    next(reader)

    assignments = {}
    for row in reader:
        group = int(row[0])
        value = int(row[1])
        if group not in assignments.keys():
            assignments[group] = []
        assignments[group].append(value)

for group in assignments.keys():
    values = assignments[group]
    avg = sum(values) / len(values)
    print("avg of group", group, "is", avg)

avg of group 0 is 4.5
avg of group 1 is 5.5
avg of group 2 is 6.5

Exercise 8.4¶

import csv

with open("grouped_data.csv", "r") as fh:
    reader = csv.reader(fh)
    next(reader)

    assignments = {}
    for row in reader:
        group = int(row[0])
        value = int(row[1])
        if group not in assignments.keys():
            assignments[group] = []
        assignments[group].append(value)

average = {}
for group in assignments.keys():
    values = assignments[group]
    avg = sum(values) / len(values)
    average[group] = avg
    
with open("grouped_data.csv", "r") as fh_in:
    reader = csv.reader(fh_in)
    header = next(reader)
    
    with open("grouped_data_with_averages.csv", "w") as fh_out:
        writer = csv.writer(fh_out)
        header.append("average")
        writer.writerow(header)
        
        for row in reader:
            group = int(row[0])
            row.append(average[group])
            writer.writerow(row)