Example solutions for script 07_container_types

Exercise 1.2

numbers = []
for i in range(17):
    numbers.append(2 ** i)
[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536]

This can be expressed much simpler by using a so called list comprehension which we did not handle in the script yet:

numbers = [2 ** i for i in range(17)]
[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536]

Exercise 1.3

filtered_numbers = []
for number in numbers:
    if number >= 1000 and number <= 9999:
[1024, 2048, 4096, 8192]

Exercise 1.4

numbers = []
while True:
    user_input = input("please enter a number or 'x' if you are done: ").strip()
    if user_input.lower() == "x":
if len(numbers) == 0:
    print("can't compute min, max, average.")
    print("the minimum of the values you entered is", min(numbers))
    print("the maximum of the values you entered is", max(numbers))
    print("the average of the values you entered is", sum(numbers) / len(numbers))
please enter a number or 'x' if you are done: 1
please enter a number or 'x' if you are done: 3
please enter a number or 'x' if you are done: 2
please enter a number or 'x' if you are done: x
the minimum of the values you entered is 1.0
the maximum of the values you entered is 3.0
the average of the values you entered is 2.0

Exercise 1.5

n = 18

divider = 2
is_prime = True

while divider * divider <= n:
    if n % divider == 0:
        is_prime = False
    divider += 1
print(n, "is a prime:", is_prime)
18 is a prime: False

Exercise 1.6

primes = []

for n in range(2, 1001):
    divider = 2
    is_prime = True
    while divider * divider <= n:
        if n % divider == 0:
            is_prime = False
        divider += 1
    if is_prime:
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997]

Exercise 1.7

status_lines = []

with open("short.fasta", "r") as fh:
    for line in fh:
        if line.startswith(">"):

for line in sorted(status_lines):
# alternative: write the status lines to a csv file:

import csv
with open("sorted_status_lines.csv", "w", newline="") as fh:
    w = csv.writer(fh)
    for line in sorted(status_lines):
>gi|2765652|emb|Z78527.1|CYZ78527 C.yatabeanum 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765654|emb|Z78529.1|CLZ78529 C.lichiangense 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765655|emb|Z78530.1|CMZ78530 C.margaritaceum 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765656|emb|Z78531.1|CFZ78531 C.fasciculatum 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765657|emb|Z78532.1|CCZ78532 C.californicum 5.8S rRNA gene and ITS1 and ITS2 DNA
>gi|2765658|emb|Z78533.1|CIZ78533 C.irapeanum 5.8S rRNA gene and ITS1 and ITS2 DNA

Exercise 1.8

status_lines = []
sequences = []
lengths = []

# we collect data:

with open("short.fasta", "r") as fh:
    for line in fh:
        line = line.rstrip()
        if line.startswith(">"):
            last_status = line
            sequence = ""
        elif line == "":
            sequence += line

# compute the length of the longest sequence
max_len = max(lengths)

# filter collected data:
for i in range(len(lengths)):
    if lengths[i] == max_len:
>gi|2765657|emb|Z78532.1|CCZ78532 C.californicum 5.8S rRNA gene and ITS1 and ITS2 DNA

Exercise 2.2 + 2.3

%matplotlib inline
import matplotlib.pyplot as plt

iter_counts = []
start_values = []
for n in range(2, 10000):
    iter_count = 0
    while n != 1:
        iter_count += 1
        if n % 2 == 0:
            n = n // 2
            n = 3 * n + 1
plt.figure(figsize=(15, 8))
plt.plot(start_values, iter_counts, 'g.', markersize=1)

Exercise 3.2

codons = []
with open("codons.txt") as fh:
    for line in fh:
        fields = line.split()
['AAA', 'AAC', 'AAG', 'AAU', 'ACA', 'ACC', 'ACG', 'ACU', 'AGA', 'AGC', 'AGG', 'AGU', 'AUA', 'AUC', 'AUG', 'AUU', 'CAA', 'CAC', 'CAG', 'CAU', 'CCA', 'CCC', 'CCG', 'CCU', 'CGA', 'CGC', 'CGG', 'CGU', 'CUA', 'CUC', 'CUG', 'CUU', 'GAA', 'GAC', 'GAG', 'GAU', 'GCA', 'GCC', 'GCG', 'GCU', 'GGA', 'GGC', 'GGG', 'GGU', 'GUA', 'GUC', 'GUG', 'GUU', 'UAA', 'UAC', 'UAG', 'UAU', 'UCA', 'UCC', 'UCG', 'UCU', 'UGA', 'UGC', 'UGG', 'UGU', 'UUA', 'UUC', 'UUG', 'UUU']

Alternative solution:

codons = []
with open("codons.txt") as fh:
    for line in fh:
        fields = line.split()
        for i in  [0, 5, 10, 15]:
['AAA', 'AAC', 'AAG', 'AAU', 'ACA', 'ACC', 'ACG', 'ACU', 'AGA', 'AGC', 'AGG', 'AGU', 'AUA', 'AUC', 'AUG', 'AUU', 'CAA', 'CAC', 'CAG', 'CAU', 'CCA', 'CCC', 'CCG', 'CCU', 'CGA', 'CGC', 'CGG', 'CGU', 'CUA', 'CUC', 'CUG', 'CUU', 'GAA', 'GAC', 'GAG', 'GAU', 'GCA', 'GCC', 'GCG', 'GCU', 'GGA', 'GGC', 'GGG', 'GGU', 'GUA', 'GUC', 'GUG', 'GUU', 'UAA', 'UAC', 'UAG', 'UAU', 'UCA', 'UCC', 'UCG', 'UCU', 'UGA', 'UGC', 'UGG', 'UGU', 'UUA', 'UUC', 'UUG', 'UUU']

Exercise 3.3

import csv

with open("codons.csv", "w") as fh_in:
    writer = csv.writer(fh_in)

    with open("codons.txt", "r") as fh:
        for line in fh:
            fields = line.split()
            for i in  [0, 5, 10, 15]:
                writer.writerow([fields[i], fields[i+1]])

# check:
with open("codons.csv", "r") as fh:
    reader = csv.reader(fh)
    for row in reader:
['UUU', 'F']
['UCU', 'S']
['UAU', 'Y']
['UGU', 'C']
['UUC', 'F']
['UCC', 'S']
['UAC', 'Y']
['UGC', 'C']
['UUA', 'L']
['UCA', 'S']
['UAA', '*']
['UGA', '*']
['UUG', 'L']
['UCG', 'S']
['UAG', '*']
['UGG', 'W']
['CUU', 'L']
['CCU', 'P']
['CAU', 'H']
['CGU', 'R']
['CUC', 'L']
['CCC', 'P']
['CAC', 'H']
['CGC', 'R']
['CUA', 'L']
['CCA', 'P']
['CAA', 'Q']
['CGA', 'R']
['CUG', 'L']
['CCG', 'P']
['CAG', 'Q']
['CGG', 'R']
['AUU', 'I']
['ACU', 'T']
['AAU', 'N']
['AGU', 'S']
['AUC', 'I']
['ACC', 'T']
['AAC', 'N']
['AGC', 'S']
['AUA', 'I']
['ACA', 'T']
['AAA', 'K']
['AGA', 'R']
['AUG', 'M']
['ACG', 'T']
['AAG', 'K']
['AGG', 'R']
['GUU', 'V']
['GCU', 'A']
['GAU', 'D']
['GGU', 'G']
['GUC', 'V']
['GCC', 'A']
['GAC', 'D']
['GGC', 'G']
['GUA', 'V']
['GCA', 'A']
['GAA', 'E']
['GGA', 'G']
['GUG', 'V']
['GCG', 'A']
['GAG', 'E']
['GGG', 'G']

Exercise 4.2

fib_numbers = [1, 1]
while len(fib_numbers) < 100:
    fib_numbers.append(fib_numbers[-2] + fib_numbers[-1])
[1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296, 433494437, 701408733, 1134903170, 1836311903, 2971215073, 4807526976, 7778742049, 12586269025, 20365011074, 32951280099, 53316291173, 86267571272, 139583862445, 225851433717, 365435296162, 591286729879, 956722026041, 1548008755920, 2504730781961, 4052739537881, 6557470319842, 10610209857723, 17167680177565, 27777890035288, 44945570212853, 72723460248141, 117669030460994, 190392490709135, 308061521170129, 498454011879264, 806515533049393, 1304969544928657, 2111485077978050, 3416454622906707, 5527939700884757, 8944394323791464, 14472334024676221, 23416728348467685, 37889062373143906, 61305790721611591, 99194853094755497, 160500643816367088, 259695496911122585, 420196140727489673, 679891637638612258, 1100087778366101931, 1779979416004714189, 2880067194370816120, 4660046610375530309, 7540113804746346429, 12200160415121876738, 19740274219868223167, 31940434634990099905, 51680708854858323072, 83621143489848422977, 135301852344706746049, 218922995834555169026, 354224848179261915075]

Exercise 4.3

import csv

one_letter_codes = []
average_masses = []

with open("amino_acids.csv", "r") as fh:
    r = csv.reader(fh, delimiter=",")

    for line in r:
['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
[89.09408, 174.20278, 132.11908, 133.10388, 121.15408000000001, 147.13078, 146.14597999999998, 75.06718000000001, 155.15637999999998, 131.17468, 131.17468, 146.18938, 149.20788, 165.19188, 115.13198, 105.09348, 119.12038, 204.22848, 181.19127999999998, 117.14788]

Exercise 4.4

import csv

one_letter_codes = []
average_masses = []

with open("amino_acids.csv", "r") as fh:
    r = csv.reader(fh, delimiter=",")
    for line in r:

symbol = input("tell me a symbol: ")
if symbol in one_letter_codes:
    index = one_letter_codes.index(symbol)
    print("mass of", symbol, "is", average_masses[index])
    print("this is not a valid symbol, try again !")
tell me a symbol: A
mass of A is 89.09408

I extended the exercise a bit and ask until the user provides a valid symbol below. We can use the test if symbol in one_letter_codes to check if symbol is a known symbol:

import csv

one_letter_codes = []
average_masses = []

with open("amino_acids.csv", "r") as fh:
    r = csv.reader(fh, delimiter=",")
    for line in r:

while True:
    symbol = input("tell me a symbol: ")
    if symbol in one_letter_codes:
    print("this is not a valid symbol, try again !")
index = one_letter_codes.index(symbol)
print("mass of", symbol, "is", average_masses[index])
tell me a symbol: A
mass of A is 89.09408

Exercise 4.5

In addition to the exercise I added some extra code to handle invalid symbols in the users input:

import csv

one_letter_codes = []
average_masses = []

with open("amino_acids.csv", "r") as fh:
    r = csv.reader(fh, delimiter=",")
    for line in r:

sequence = input("tell me a sequence: ")

cleaned_sequence = ""
full_mass = 0

for symbol in sequence:
    if symbol in one_letter_codes:
        index = one_letter_codes.index(symbol)
        full_mass += average_masses[index]
        cleaned_sequence += symbol
skipped = len(sequence) - len(cleaned_sequence)
if skipped > 0:
    print("you provided", skipped, "invalid symbols which I skipped")
sequence_mass = full_mass - (len(cleaned_sequence) - 1) * 18.01528    
print("the mass of the sequence", cleaned_sequence, "is", sequence_mass)
tell me a sequence: FSYC
the mass of the sequence FSYC is 518.58488

Exercise 6.1