We use the requests library to fetch the data within Python.

The next command starting with ! only works within "jupyter", the tool I use for the scripts. You might need to install requests differently.

!pip install requests

Requirement already satisfied: requests in ./venv/lib/python3.6/site-packages
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in ./venv/lib/python3.6/site-packages (from requests)
Requirement already satisfied: certifi>=2017.4.17 in ./venv/lib/python3.6/site-packages (from requests)
Requirement already satisfied: urllib3<1.23,>=1.21.1 in ./venv/lib/python3.6/site-packages (from requests)
Requirement already satisfied: idna<2.7,>=2.5 in ./venv/lib/python3.6/site-packages (from requests)

import requests

def fetch_data():
    """fetches codon table as text over internet"""
    # verify=False to circumvent https security issues, may cause a InsecureRequestWarning.
    data = requests.get("https://siscourses.ethz.ch/python_dbiol/data/codon_table.txt", verify=False)
    return data.text

# you might see a warning if you run the following line, you can ignore this warning:
# print(fetch_data()[:300])

Below I use Pythons feature to consider empty strings as False. (Same for empty lists, tuples, dictionaries, sets as well as values 0, 0.0 and None):

def build_mapping(text):
    """builds the mapping rna codon -> aa symbol based on the 
    downloaded text file.
    
    the code is very dependend on the actual formatting of the text"""
    
    mapping = {}
    lines = text.split("\n")
    
    for line in lines[1:]:     # skip header
        line = line.strip()    # remove trailing "\n"
        if line:               # only consider non empty lines
            fields = line.split(" ")
            # intersting data is in columns 5/6, 10/11, and so on:
            for index in range(0, len(fields), 5):
                mapping[fields[index]] = fields[index + 1]
    return mapping

# print(build_mapping(fetch_data()))

def read_fasta(path):
    """reads a fasta file from the given path.
    returns a list of tuples. the first entry of every tuple is the identifier line
    the second entry is the actual sequence
    
    implementation: every time we see a status line we store the previously seen
    sequence (if there was one).
    """
    
    sequences = []
    last_sequence = ""
    
    with open(path, "r") as fh:
        for line in fh:
            line = line.rstrip()        # remove trailing \n
            if line.startswith(">"):
                if last_sequence:
                    # record what we have seen so far:
                    sequences.append((status, last_sequence))
                    last_sequence = ""
                status = line
            else:
                last_sequence += line
    
    # don't ignore the last entry:
    if last_sequence:
        sequences.append((status, last_sequence))
    return sequences
              
# print(read_fasta("rna_fake.fasta"))[:3]

def translate_rna_to_aa(rna_sequence, mapping):
    
    result = []
    
    # split sequence into codons
    for i in range(0, len(rna_sequence), 3):
        codon = rna_sequence[i: i + 3]
        
        aa = mapping.get(codon, "*")  # "*" if codon is not a key in mapping
        result.append(aa)
    
    return "".join(result)

# mapping = build_mapping(fetch_data())
# sequence = read_fasta("rna_fake.fasta")[0][1]
# translate_rna_to_aa(sequence, mapping)

I build the result file line by line using a list of strings. Finally the result is constructed with the join method of strings:

def translate_fasta_file(in_path, out_path):
    
    mapping = build_mapping(fetch_data())
    sequences = read_fasta(in_path)
    
    result_lines = []
    for identifier, rna_sequence in sequences:
        aa_sequence = translate_rna_to_aa(rna_sequence, mapping)
        result_lines.append(identifier)
        result_lines.append(aa_sequence)
        
    with open(out_path, "w") as fh:
        fh.write("\n".join(result_lines))

translate_fasta_file("rna_fake.fasta", "aa_fake.fasta")

/Users/uweschmitt/Projects/python3-course-advanced/venv3.6/lib/python3.6/site-packages/urllib3/connectionpool.py:858: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
  InsecureRequestWarning)

# first ten lines, only works within jupyter
!head aa_fake.fasta

>gi|2765658|emb|Z78533.1|CIZ78533 C.irapeanum 5.8S rRNA gene and IUS1 and IUS2 RNA
RNKVSVGEPAEGSLMRPWNKRSSESGGPVYSAHRGHCSRGDPDLLLGRLGSVHGGFEPLARRSLGAKPYESITGEWHCLPQNPERRRAVACPMNFDDSRKRESWLFASDGRTQRNAISGVNCKIP*TIESFERKLRPRPSG*GHACLGVALRLSPANACPAYSQAGVVRM*KIGPLCLGAAGPRAGVLMARNPARGGRMLAAAAVRIPHVVVLVGQAGEPFRTPMEGG*PPFGCDPRSGGGTR*VY*
>gi|2765657|emb|Z78532.1|CCZ78532 C.californicum 5.8S rRNA gene and IUS1 and IUS2 RNA
RNKVSVGEPAEGSLLRQQNI*SSESGGPVVTQLVVALLLS*PCFVVGPPQELSWQV*TLVRCSLRQVI*SITDE*HYCQKKSEGQYATEHASEFL*LSQRISWL*HR*RTQLNAISGVNCRIP*TIESLNASCARGHQAKGTPAWASCVASLLPMLAWHIAKLALYGCE*LAPCA*VRWV*GLLL*WVGMWHEVENANSHKAAI*IPHVVVFFRTYTRT*LNPNGAKITIGQLISIQMRPQVRRGHPLS*G
>gi|2765656|emb|Z78531.1|CFZ78531 C.fasciculatum 5.8S rRNA gene and IUS1 and IUS2 RNA
RNKVSVGEPAEGSLLRQQNIRSSESGGPVVTRLTVALLSW*TRFATGPPRELSWRV*TSSAAQFAPSHMERHRWMAFLSRKTRRGGVCCACQ*IYDDSRQRDIWLLHR*RTQRNAISGVNCRIPRTIESLNASCARGHQAKGTPAWASCAASLLIMLDWHAASLSL*GRERLAPCA*VRRV*ASVF*WPGTWQ*VEDAGSRKAAVRIPRVVVLVRPTEEPV*TPSGRKTALGR*FPFRCDPSQAGHP*V*
>gi|2765655|emb|Z78530.1|CMZ78530 C.margaritaceum 5.8S rRNA gene and IUS1 and IUS2 RNA
RNKVSVGEPAEGSLLKQHNKRLSESGGLVVIWLARDILLW*P*FVIGPH*ELSWRV*TSSTVQFAPRYIKNHR*MTLLPHTTSEVWCVNVHSNEF**LSADGYLDSCIDEERTEM**VV*IAESREPSSL*TQVAPEAIRLRARLPGRRMFYLSFQCLSSI*LGHHCVDVKDWPLVLRCGGSKDMCFDGLKLGKRWRMLAAARLLFESPMLSCLLGL*NNLFGP*LRQNNPWVVDFQSDATPVRGPPH
>gi|2765654|emb|Z78529.1|CLZ78529 C.lichiangense 5.8S rRNA gene and IUS1 and IUS2 RNA
TASCRRTLLRQQNIRLSESGGLVVIWLARDFLLW*P*FVIGPH*ELSWRV*TSSTVQFAPRYIKNHR*MTLLSKKSEVWCVIGHANELLMTLAEGYLGSCIDEESHRNVISGVNCRIP*TIESLNASCARGHQAKGTPAWASYVLSLLPMLVQHIARPSLCGCERLAPCA*VRWV*GYVF*WSETWQEVEDAGSRKAIV*IPHVVIFVGPIEQLVWTLIKAKQSLGG*FPIRCDPSQRATS*AK*

Solution using generators.¶

Here comes a modified version which saves memory by not reading all sequences first into the memory, instead sequences are read "on demand". This allows processing of very hughe files which would not fit into your computers memory.

The implemenatation below uses so called "generators" which are explained in the proposed solution for the "sum formla fit" challenge.

def read_fasta_generator(path):
    """reads a fasta file from the given path.
    returns a list of tuples. the first entry of every tuple is the identifier line
    the second entry is the actual sequence
    
    implementation: every time we see a status line we store the previously seen
    sequence (if there was one).
    """
    
    sequences = []
    last_sequence = ""
    
    with open(path, "r") as fh:
        for line in fh:
            line = line.rstrip()        # remove trailing \n
            if line.startswith(">"):
                if last_sequence:
                    # THIS LINE IS DIFFERENT TO THE PREVIOUS VERSION
                    yield (status, last_sequence)
                    last_sequence = ""
                status = line
            else:
                last_sequence += line
    
    # don't ignore the last entry:
    if last_sequence:
        yield (status, last_sequence)

        
# uncomment for testing:
#
# for status, seq in read_fasta_generator("rna_fake.fasta"):
#    print(status)
#    print(seq)
#    break

def translate_fasta_file_efficient(in_path, out_path):
    """
    this solution avoids holding all data in memory so we
    can process FASTA files which do not fit into memory !
    """
    
    mapping = build_mapping(fetch_data())
    sequences = read_fasta(in_path)
    
    with open(out_path, "w") as fh:
        for identifier, rna_sequence in read_fasta_generator(in_path):
            aa_sequence = translate_rna_to_aa(rna_sequence, mapping)
            print(identifier, file=fh)
            print(aa_sequence, file=fh)
            
translate_fasta_file_efficient("rna_fake.fasta", "aa_fake.fasta")

/Users/uweschmitt/Projects/python3-course-advanced/venv3.6/lib/python3.6/site-packages/urllib3/connectionpool.py:858: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
  InsecureRequestWarning)

# first ten lines, only works within jupyter
!head aa_fake.fasta

>gi|2765658|emb|Z78533.1|CIZ78533 C.irapeanum 5.8S rRNA gene and IUS1 and IUS2 RNA
RNKVSVGEPAEGSLMRPWNKRSSESGGPVYSAHRGHCSRGDPDLLLGRLGSVHGGFEPLARRSLGAKPYESITGEWHCLPQNPERRRAVACPMNFDDSRKRESWLFASDGRTQRNAISGVNCKIP*TIESFERKLRPRPSG*GHACLGVALRLSPANACPAYSQAGVVRM*KIGPLCLGAAGPRAGVLMARNPARGGRMLAAAAVRIPHVVVLVGQAGEPFRTPMEGG*PPFGCDPRSGGGTR*VY*
>gi|2765657|emb|Z78532.1|CCZ78532 C.californicum 5.8S rRNA gene and IUS1 and IUS2 RNA
RNKVSVGEPAEGSLLRQQNI*SSESGGPVVTQLVVALLLS*PCFVVGPPQELSWQV*TLVRCSLRQVI*SITDE*HYCQKKSEGQYATEHASEFL*LSQRISWL*HR*RTQLNAISGVNCRIP*TIESLNASCARGHQAKGTPAWASCVASLLPMLAWHIAKLALYGCE*LAPCA*VRWV*GLLL*WVGMWHEVENANSHKAAI*IPHVVVFFRTYTRT*LNPNGAKITIGQLISIQMRPQVRRGHPLS*G
>gi|2765656|emb|Z78531.1|CFZ78531 C.fasciculatum 5.8S rRNA gene and IUS1 and IUS2 RNA
RNKVSVGEPAEGSLLRQQNIRSSESGGPVVTRLTVALLSW*TRFATGPPRELSWRV*TSSAAQFAPSHMERHRWMAFLSRKTRRGGVCCACQ*IYDDSRQRDIWLLHR*RTQRNAISGVNCRIPRTIESLNASCARGHQAKGTPAWASCAASLLIMLDWHAASLSL*GRERLAPCA*VRRV*ASVF*WPGTWQ*VEDAGSRKAAVRIPRVVVLVRPTEEPV*TPSGRKTALGR*FPFRCDPSQAGHP*V*
>gi|2765655|emb|Z78530.1|CMZ78530 C.margaritaceum 5.8S rRNA gene and IUS1 and IUS2 RNA
RNKVSVGEPAEGSLLKQHNKRLSESGGLVVIWLARDILLW*P*FVIGPH*ELSWRV*TSSTVQFAPRYIKNHR*MTLLPHTTSEVWCVNVHSNEF**LSADGYLDSCIDEERTEM**VV*IAESREPSSL*TQVAPEAIRLRARLPGRRMFYLSFQCLSSI*LGHHCVDVKDWPLVLRCGGSKDMCFDGLKLGKRWRMLAAARLLFESPMLSCLLGL*NNLFGP*LRQNNPWVVDFQSDATPVRGPPH
>gi|2765654|emb|Z78529.1|CLZ78529 C.lichiangense 5.8S rRNA gene and IUS1 and IUS2 RNA
TASCRRTLLRQQNIRLSESGGLVVIWLARDFLLW*P*FVIGPH*ELSWRV*TSSTVQFAPRYIKNHR*MTLLSKKSEVWCVIGHANELLMTLAEGYLGSCIDEESHRNVISGVNCRIP*TIESLNASCARGHQAKGTPAWASYVLSLLPMLVQHIARPSLCGCERLAPCA*VRWV*GYVF*WSETWQEVEDAGSRKAIV*IPHVVIFVGPIEQLVWTLIKAKQSLGG*FPIRCDPSQRATS*AK*