import os
import requests
def download_sheet(url):
"""only download file if not already present.
"""
file_name = os.path.basename(url)
if os.path.exists(file_name):
return file_name
data = requests.get(url).text
with open(file_name, "w") as fh:
fh.write(data)
return file_name
file_name = download_sheet("https://siscourses.ethz.ch/python_challenges/data_grouped.csv")
def pretty_print(sheet, max_col_width=8):
for row in sheet:
for cell in row:
cell = str(cell)[:max_col_width]
cell = (max_col_width - len(cell)) * " " + cell
print(cell, end=" ")
print()
import csv
def read_sheet(file_name):
with open(file_name, "r") as fh:
reader = csv.reader(fh, delimiter=",")
result = []
for line in reader:
result.append(line)
return result
sheet = read_sheet(file_name)
# check first 10 rows
pretty_print(sheet[:10])
def to_float(txt):
try:
return float(txt)
except ValueError:
return None
print(to_float("niente"), to_float("1.23"))
def fix_invalid_numbers(sheet):
"""converts invalid numbers in number cells to None,
"""
result = []
for row in sheet:
new_row = [row[0]]
for i, cell in enumerate(row[1:]):
new_row.append(to_float(cell))
result.append(new_row)
return result
fixed = fix_invalid_numbers(sheet)
# check first 10 rows
pretty_print(fixed[:10])
def extend_rows(sheet):
"""extends rows with None values so that all rows have the same length
afterwards.
"""
result = []
max_length = max([len(row) for row in sheet])
for row in sheet:
missing = max_length - len(row)
fill_up = [None] * missing
new_row = row + fill_up
result.append(new_row)
return result
fixed = extend_rows(fixed)
# check first 10 rows
pretty_print(fixed[:10])
def compute_average(values):
values = [v for v in values if v is not None]
if not values: # empty list is handled as False
return None
return sum(values) / len(values)
print(compute_average([]))
print(compute_average([None]))
print(compute_average([1, None, 3]))
print(compute_average([1, 3, 2]))
def strip_first_column(sheet):
"""removes first comlum"""
return [row[1:] for row in sheet]
stripped = strip_first_column(fixed)
# check
pretty_print(stripped[:5])
def column_wise_averages(sheet):
"""computes column wise average of given cells,
cells is a list of lists"""
n0 = len(sheet[0])
column_averages = []
for column_index in range(n0):
column_values = [row[column_index] for row in sheet]
average = compute_average(column_values)
column_averages.append(average)
return column_averages
averages = column_wise_averages(stripped)
print(averages)
from collections import defaultdict
def split_sheet(sheet):
sheets = defaultdict(list)
for row in sheet:
group_id = row[0]
data = row[1:]
sheets[group_id].append(data)
return sheets
sheets = split_sheet(fixed)
print("avail groups:", sheets.keys())
print()
print("sheet of group1:")
pretty_print(sheets["group1"])
result_sheet = []
for group, sheet in sheets.items():
averages = column_wise_averages(sheet)
row = [group] + averages
result_sheet.append(row)
pretty_print(result_sheet)