numbers = []
for i in range(17):
numbers.append(2 ** i)
print(numbers)
print(sum(numbers))
This can be expressed much simpler by using a so called list comprehension which we did not handle in the script yet:
numbers = [2 ** i for i in range(17)]
print(numbers)
filtered_numbers = []
for number in numbers:
if number >= 1000 and number <= 9999:
filtered_numbers.append(number)
print(filtered_numbers)
numbers = []
while True:
user_input = input("please enter a number or 'x' if you are done: ").strip()
if user_input.lower() == "x":
break
numbers.append(float(user_input))
if len(numbers) == 0:
print("can't compute min, max, average.")
else:
print("the minimum of the values you entered is", min(numbers))
print("the maximum of the values you entered is", max(numbers))
print("the average of the values you entered is", sum(numbers) / len(numbers))
n = 18
divider = 2
is_prime = True
while divider * divider <= n:
if n % divider == 0:
is_prime = False
break
divider += 1
print(n, "is a prime:", is_prime)
primes = []
for n in range(2, 1001):
divider = 2
is_prime = True
while divider * divider <= n:
if n % divider == 0:
is_prime = False
break
divider += 1
if is_prime:
primes.append(n)
print(primes)
status_lines = []
with open("short.fasta", "r") as fh:
for line in fh:
if line.startswith(">"):
status_lines.append(line.rstrip())
for line in sorted(status_lines):
print(line)
# alternative: write the status lines to a csv file:
import csv
with open("sorted_status_lines.csv", "w", newline="") as fh:
w = csv.writer(fh)
for line in sorted(status_lines):
w.writerow([line])
status_lines = []
sequences = []
lengths = []
# we collect data:
with open("short.fasta", "r") as fh:
for line in fh:
line = line.rstrip()
if line.startswith(">"):
last_status = line
sequence = ""
elif line == "":
status_lines.append(last_status)
sequences.append(sequence)
lengths.append(len(sequence))
else:
sequence += line
# compute the length of the longest sequence
max_len = max(lengths)
# filter collected data:
for i in range(len(lengths)):
if lengths[i] == max_len:
print(status_lines[i])
print(sequences[i])
%matplotlib inline
import matplotlib.pyplot as plt
iter_counts = []
start_values = []
for n in range(2, 10000):
start_values.append(n)
iter_count = 0
while n != 1:
iter_count += 1
if n % 2 == 0:
n = n // 2
else:
n = 3 * n + 1
iter_counts.append(iter_count)
plt.figure(figsize=(15, 8))
plt.plot(start_values, iter_counts, 'g.', markersize=1)
plt.show()
codons = []
with open("codons.txt") as fh:
for line in fh:
fields = line.split()
codons.append(fields[0])
codons.append(fields[5])
codons.append(fields[10])
codons.append(fields[15])
print(sorted(codons))
Alternative solution:
codons = []
with open("codons.txt") as fh:
for line in fh:
fields = line.split()
for i in [0, 5, 10, 15]:
codons.append(fields[i])
print(sorted(codons))
import csv
with open("codons.csv", "w") as fh_in:
writer = csv.writer(fh_in)
with open("codons.txt", "r") as fh:
for line in fh:
fields = line.split()
for i in [0, 5, 10, 15]:
writer.writerow([fields[i], fields[i+1]])
# check:
with open("codons.csv", "r") as fh:
reader = csv.reader(fh)
for row in reader:
print(row)
fib_numbers = [1, 1]
while len(fib_numbers) < 100:
fib_numbers.append(fib_numbers[-2] + fib_numbers[-1])
print(fib_numbers)
import csv
one_letter_codes = []
average_masses = []
with open("amino_acids.csv", "r") as fh:
r = csv.reader(fh, delimiter=",")
next(r)
for line in r:
one_letter_codes.append(line[0])
average_masses.append(float(line[4]))
print(one_letter_codes)
print(average_masses)
import csv
one_letter_codes = []
average_masses = []
with open("amino_acids.csv", "r") as fh:
r = csv.reader(fh, delimiter=",")
next(r)
for line in r:
one_letter_codes.append(line[0])
average_masses.append(float(line[4]))
symbol = input("tell me a symbol: ")
if symbol in one_letter_codes:
index = one_letter_codes.index(symbol)
print("mass of", symbol, "is", average_masses[index])
else:
print("this is not a valid symbol, try again !")
I extended the exercise a bit and ask until the user provides a valid symbol below. We can use the test
if symbol in one_letter_codes
to check if symbol
is a known symbol:
import csv
one_letter_codes = []
average_masses = []
with open("amino_acids.csv", "r") as fh:
r = csv.reader(fh, delimiter=",")
next(r)
for line in r:
one_letter_codes.append(line[0])
average_masses.append(float(line[4]))
while True:
symbol = input("tell me a symbol: ")
if symbol in one_letter_codes:
break
print("this is not a valid symbol, try again !")
index = one_letter_codes.index(symbol)
print("mass of", symbol, "is", average_masses[index])
In addition to the exercise I added some extra code to handle invalid symbols in the users input:
import csv
one_letter_codes = []
average_masses = []
with open("amino_acids.csv", "r") as fh:
r = csv.reader(fh, delimiter=",")
next(r)
for line in r:
one_letter_codes.append(line[0])
average_masses.append(float(line[4]))
sequence = input("tell me a sequence: ")
cleaned_sequence = ""
full_mass = 0
for symbol in sequence:
if symbol in one_letter_codes:
index = one_letter_codes.index(symbol)
full_mass += average_masses[index]
cleaned_sequence += symbol
skipped = len(sequence) - len(cleaned_sequence)
if skipped > 0:
print("you provided", skipped, "invalid symbols which I skipped")
sequence_mass = full_mass - (len(cleaned_sequence) - 1) * 18.01528
print("the mass of the sequence", cleaned_sequence, "is", sequence_mass)
doubled = {}
for i in range(1, 11):
doubled[i] = 2 * i
print(doubled[4])
import csv
codon_to_aa = {}
with open("codons.csv", "w") as fh_in:
writer = csv.writer(fh_in)
with open("codons.txt", "r") as fh:
for line in fh:
fields = line.split()
for i in [0, 5, 10, 15]:
codon_to_aa[fields[i]] = fields[i + 1]
print(codon_to_aa["UUU"])
symbol_to_mass = {}
with open("amino_acids.csv", "r") as fh:
r = csv.reader(fh, delimiter=",")
next(r)
for line in r:
symbol = line[0]
mass = float(line[3])
symbol_to_mass[symbol] = mass
while True:
one_letter_code = input("tell me a one letter code: ").upper()
if len(one_letter_code) != 1:
print("this is not a one letter input, try again")
else:
break
mass = symbol_to_mass[one_letter_code]
print("mass of", one_letter_code, "is", mass)
Updating the histogram takes here a slightly different approach than we presented in the script (Compare how we updated the dictionary in the word histogram example in the script !):
sequence = input("sequence ? ")
histogram = {}
for symbol in sequence:
if symbol not in histogram.keys():
histogram[symbol] = 0
histogram[symbol] += 1
print(histogram)
This solutions also skips the tail of a RNA sequence if its length is not a multiple of three. We use slicing as introduced in the script about strings here:
codons = {}
with open("codon_table.txt", "r") as fh:
next(fh)
for line in fh:
fields = line.split()
for i in [0, 5, 10, 15]:
codons[fields[i]] = fields[i + 1]
rna_seq = input("please provide a rna sequence: ").replace(" ", "")
aa_sequence = ""
for start in range(0, len(rna_seq), 3):
if start + 2 >= len(rna_seq):
print("skipped tail", rna_seq[start:]) ### slicing !
break
codon = rna_seq[start:start + 3] ### slicint !
if codon in codons.keys():
aa_sequence += codons[codon]
else:
aa_sequence += "*"
print("aa sequence is", aa_sequence)
values = [1, 2, 3, 2, 7]
groups = [1, 0, 0, 1, 1]
assignments = {}
for i in range(len(values)):
group = groups[i]
value = values[i]
if group not in assignments.keys():
assignments[group] = []
assignments[group].append(value)
for group in assignments.keys():
values = assignments[group]
avg = sum(values) / len(values)
print("avg of group", group, "is", avg)
We first create a csv file with Python (this was not part of the exercise, it is fine if you created the file manually):
import csv
with open("grouped_data.csv", "w") as fh:
writer = csv.writer(fh)
writer.writerow(["group", "value"])
for value in range(12):
group = value % 3
writer.writerow([group, value])
Now we read the data from the csv file:
import csv
with open("grouped_data.csv", "r") as fh:
reader = csv.reader(fh)
next(reader)
assignments = {}
for row in reader:
group = int(row[0])
value = int(row[1])
if group not in assignments.keys():
assignments[group] = []
assignments[group].append(value)
for group in assignments.keys():
values = assignments[group]
avg = sum(values) / len(values)
print("avg of group", group, "is", avg)
import csv
with open("grouped_data.csv", "r") as fh:
reader = csv.reader(fh)
next(reader)
assignments = {}
for row in reader:
group = int(row[0])
value = int(row[1])
if group not in assignments.keys():
assignments[group] = []
assignments[group].append(value)
average = {}
for group in assignments.keys():
values = assignments[group]
avg = sum(values) / len(values)
average[group] = avg
with open("grouped_data.csv", "r") as fh_in:
reader = csv.reader(fh_in)
header = next(reader)
with open("grouped_data_with_averages.csv", "w") as fh_out:
writer = csv.writer(fh_out)
header.append("average")
writer.writerow(header)
for row in reader:
group = int(row[0])
row.append(average[group])
writer.writerow(row)