numbers = []
for i in range(17):
    numbers.append(2 ** i)
print(numbers)
print(sum(numbers))
This can be expressed much simpler by using a so called list comprehension which we did not handle in the script yet:
numbers = [2 ** i for i in range(17)]
print(numbers)
filtered_numbers = []
for number in numbers:
    if number >= 1000 and number <= 9999:
        filtered_numbers.append(number)
print(filtered_numbers)
numbers = []
while True:
    user_input = input("please enter a number or 'x' if you are done: ").strip()
    if user_input.lower() == "x":
        break
    numbers.append(float(user_input))
    
if len(numbers) == 0:
    print("can't compute min, max, average.")
else:
    print("the minimum of the values you entered is", min(numbers))
    print("the maximum of the values you entered is", max(numbers))
    print("the average of the values you entered is", sum(numbers) / len(numbers))
n = 18
divider = 2
is_prime = True
while divider * divider <= n:
    if n % divider == 0:
        is_prime = False
        break
    divider += 1
        
print(n, "is a prime:", is_prime)
        
primes = []
for n in range(2, 1001):
    divider = 2
    is_prime = True
    while divider * divider <= n:
        if n % divider == 0:
            is_prime = False
            break
        divider += 1
    if is_prime:
        primes.append(n)
        
print(primes) 
status_lines = []
with open("short.fasta", "r") as fh:
    for line in fh:
        if line.startswith(">"):
            status_lines.append(line.rstrip())
for line in sorted(status_lines):
    print(line)
    
# alternative: write the status lines to a csv file:
import csv
with open("sorted_status_lines.csv", "w", newline="") as fh:
    w = csv.writer(fh)
    for line in sorted(status_lines):
        w.writerow([line])
status_lines = []
sequences = []
lengths = []
# we collect data:
with open("short.fasta", "r") as fh:
    for line in fh:
        line = line.rstrip()
        if line.startswith(">"):
            last_status = line
            sequence = ""
        elif line == "":
            status_lines.append(last_status)
            sequences.append(sequence)
            lengths.append(len(sequence))
        else:
            sequence += line
# compute the length of the longest sequence
max_len = max(lengths)
# filter collected data:
for i in range(len(lengths)):
    if lengths[i] == max_len:
        print(status_lines[i])
        print(sequences[i])
%matplotlib inline
import matplotlib.pyplot as plt
iter_counts = []
start_values = []
for n in range(2, 10000):
    start_values.append(n)
    
    iter_count = 0
    while n != 1:
        iter_count += 1
        if n % 2 == 0:
            n = n // 2
        else:
            n = 3 * n + 1
    iter_counts.append(iter_count)
    
plt.figure(figsize=(15, 8))
plt.plot(start_values, iter_counts, 'g.', markersize=1)
plt.show()
codons = []
with open("codons.txt") as fh:
    for line in fh:
        fields = line.split()
        codons.append(fields[0])
        codons.append(fields[5])
        codons.append(fields[10])
        codons.append(fields[15])
print(sorted(codons))
Alternative solution:
codons = []
with open("codons.txt") as fh:
    for line in fh:
        fields = line.split()
        for i in  [0, 5, 10, 15]:
            codons.append(fields[i])
print(sorted(codons))
import csv
with open("codons.csv", "w") as fh_in:
    writer = csv.writer(fh_in)
    with open("codons.txt", "r") as fh:
    
        for line in fh:
            fields = line.split()
            for i in  [0, 5, 10, 15]:
                writer.writerow([fields[i], fields[i+1]])
                
# check:
with open("codons.csv", "r") as fh:
    reader = csv.reader(fh)
    for row in reader:
        print(row)
fib_numbers = [1, 1]
while len(fib_numbers) < 100:
    fib_numbers.append(fib_numbers[-2] + fib_numbers[-1])
    
print(fib_numbers)
import csv
one_letter_codes = []
average_masses = []
with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)
    for line in r:
        one_letter_codes.append(line[0])
        average_masses.append(float(line[4]))
        
print(one_letter_codes)
print(average_masses)
import csv
one_letter_codes = []
average_masses = []
with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)
    
    for line in r:
        one_letter_codes.append(line[0])
        average_masses.append(float(line[4]))
symbol = input("tell me a symbol: ")
if symbol in one_letter_codes:
    index = one_letter_codes.index(symbol)
    print("mass of", symbol, "is", average_masses[index])
else:
    print("this is not a valid symbol, try again !")
    
I extended the exercise a bit and ask until the user provides a valid symbol below. We can use the test
if symbol in one_letter_codes to check if symbol is a known symbol:
import csv
one_letter_codes = []
average_masses = []
with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)
    
    for line in r:
        one_letter_codes.append(line[0])
        average_masses.append(float(line[4]))
while True:
    symbol = input("tell me a symbol: ")
    if symbol in one_letter_codes:
        break
    print("this is not a valid symbol, try again !")
    
index = one_letter_codes.index(symbol)
print("mass of", symbol, "is", average_masses[index])
In addition to the exercise I added some extra code to handle invalid symbols in the users input:
import csv
one_letter_codes = []
average_masses = []
with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)
    
    for line in r:
        one_letter_codes.append(line[0])
        average_masses.append(float(line[4]))
sequence = input("tell me a sequence: ")
cleaned_sequence = ""
full_mass = 0
for symbol in sequence:
    if symbol in one_letter_codes:
        index = one_letter_codes.index(symbol)
        full_mass += average_masses[index]
        cleaned_sequence += symbol
        
skipped = len(sequence) - len(cleaned_sequence)
if skipped > 0:
    print("you provided", skipped, "invalid symbols which I skipped")
    
sequence_mass = full_mass - (len(cleaned_sequence) - 1) * 18.01528    
print("the mass of the sequence", cleaned_sequence, "is", sequence_mass)
doubled = {}
for i in range(1, 11):
    doubled[i] = 2 * i
print(doubled[4])
import csv
codon_to_aa = {}
with open("codons.csv", "w") as fh_in:
    writer = csv.writer(fh_in)
    with open("codons.txt", "r") as fh:
    
        for line in fh:
            fields = line.split()
    
            for i in [0, 5, 10, 15]:
                codon_to_aa[fields[i]] = fields[i + 1]
                
print(codon_to_aa["UUU"])
symbol_to_mass = {}
with open("amino_acids.csv", "r") as fh:
    
    r = csv.reader(fh, delimiter=",")
    next(r)
    for line in r:
        symbol = line[0]
        mass = float(line[3])
        symbol_to_mass[symbol] = mass
        
while True:
    
    one_letter_code = input("tell me a one letter code: ").upper()
    if len(one_letter_code) != 1:
        print("this is not a one letter input, try again")
    else:
        break
        
mass = symbol_to_mass[one_letter_code]
print("mass of", one_letter_code, "is", mass)
Updating the histogram takes here a slightly different approach than we presented in the script (Compare how we updated the dictionary in the word histogram example in the script !):
sequence = input("sequence ? ")
histogram = {}
for symbol in sequence:
    if symbol not in histogram.keys():
        histogram[symbol] = 0
    histogram[symbol] += 1
    
print(histogram)
This solutions also skips the tail of a RNA sequence if its length is not a multiple of three. We use slicing as introduced in the script about strings here:
codons = {}
with open("codon_table.txt", "r") as fh:
    next(fh)
    for line in fh:
        fields = line.split()
        for i in  [0, 5, 10, 15]:
            codons[fields[i]] = fields[i + 1]
    
rna_seq = input("please provide a rna sequence: ").replace(" ", "")
aa_sequence = ""
for start in range(0, len(rna_seq), 3):
    if start + 2 >= len(rna_seq):
        print("skipped tail", rna_seq[start:])   ### slicing !
        break
    codon = rna_seq[start:start + 3]             ### slicint !
    if codon in codons.keys():
        aa_sequence += codons[codon]
    else:
        aa_sequence += "*"
print("aa sequence is", aa_sequence)
values = [1, 2, 3, 2, 7]
groups = [1, 0, 0, 1, 1]
assignments = {}
for i in range(len(values)):
    group = groups[i]
    value = values[i]
    if group not in assignments.keys():
        assignments[group] = []
    assignments[group].append(value)
for group in assignments.keys():
    values = assignments[group]
    avg = sum(values) / len(values)
    print("avg of group", group, "is", avg)
We first create a csv file with Python (this was not part of the exercise, it is fine if you created the file manually):
import csv
with open("grouped_data.csv", "w") as fh:
    writer = csv.writer(fh)
    writer.writerow(["group", "value"])
    
    for value in range(12):
        group = value % 3
        writer.writerow([group, value])
Now we read the data from the csv file:
import csv
with open("grouped_data.csv", "r") as fh:
    reader = csv.reader(fh)
    next(reader)
    assignments = {}
    for row in reader:
        group = int(row[0])
        value = int(row[1])
        if group not in assignments.keys():
            assignments[group] = []
        assignments[group].append(value)
for group in assignments.keys():
    values = assignments[group]
    avg = sum(values) / len(values)
    print("avg of group", group, "is", avg)
import csv
with open("grouped_data.csv", "r") as fh:
    reader = csv.reader(fh)
    next(reader)
    assignments = {}
    for row in reader:
        group = int(row[0])
        value = int(row[1])
        if group not in assignments.keys():
            assignments[group] = []
        assignments[group].append(value)
average = {}
for group in assignments.keys():
    values = assignments[group]
    avg = sum(values) / len(values)
    average[group] = avg
    
with open("grouped_data.csv", "r") as fh_in:
    reader = csv.reader(fh_in)
    header = next(reader)
    
    with open("grouped_data_with_averages.csv", "w") as fh_out:
        writer = csv.writer(fh_out)
        header.append("average")
        writer.writerow(header)
        
        for row in reader:
            group = int(row[0])
            row.append(average[group])
            writer.writerow(row)