# Autor: Stanislav Geidl # C2184 Uvod do programovani v Pythonu # Cviceni 06 - reseni def is_dna_sequence(sequence): """ Funkce is_dna_sequence testuje, jestli se opravdu jedna o DNA sekvenci, sekvenci obsahujici pouze a,c,g,t Function is_dna_sequence testing if parameter sequence is realy DNA sequence, string cointains only a,c,g,t >>> is_dna_sequence('agctagtacgtacgtacgata') True >>> is_dna_sequence('GACTTACGATCGACTGATCGA') True >>> is_dna_sequence('GACTTACGATCagtctatcGA') True >>> is_dna_sequence('agctagtacgtfcgtacgata') False >>> is_dna_sequence('agctagtacgtfcgtacg ta') False >>> is_dna_sequence('>agctagtacgtfcgtacgaa') False """ """ reseni pomoci testovani delky retezce (streda) sequence = sequence.lower() return len(sequence) == sequence.count('a') + sequence.count('g') + sequence.count('c') + sequence.count('t') """ """ dalsi alternativni reseni (ctvrtek od 18.00) for letter in sequence.lower(): if letter == 'a' or letter == 'c' or letter == 'g' or letter == 't': continue else: return False return True """ for base in sequence: if base not in 'acgtACGT': # muzeme testovat i v poli ['a','c','g','t','A',...] return False return True def reverse_complement_sequence(sequence): """ Funkce reverse_complement_sequence vrati komplementarni sekvenci v opacnem poradi. Komplementarni sekvence je sekvence, kde je a zameneno za T, t za A, c za G a g za C. Function reverse_complement_sequence return reverse complement sequence. >>> reverse_complement_sequence('agtagtagt') 'ACTACTACT' >>> reverse_complement_sequence('GACGCAGTGGATCCGTACAATAG') 'CTATTGTACGGATCCACTGCGTC' """ """ kompaktni reseni (ctvrtek od 8.00) complements = {'a':'T','c':'G','g':'C','t':'A'} return "".join([complements[base] for base in sequence[::-1].lower()]) """ """ reseni pomoci replace (ctvrtek od 18.00) reverse_sequence = sequence[::-1].lower() reverse_complement_sequence = reverse_sequence.replace('a','T').replace('c','G').replace('g','C').replace('t','A') return reverse_complement_sequence """ """ reseni pomoci podminek rcs = "" # len(sequence),-1,-1 for i in range(-1,-len(sequence)-1,-1): if sequence[i] == 'a' or sequence[i] == 'A': rcs += 'T' elif sequence[i] == 'c' or sequence[i] == 'C': rcs += 'G' elif sequence[i] == 'g' or sequence[i] == 'G': rcs += 'C' elif sequence[i] == 't' or sequence[i] == 'T': rcs += 'A' """ rcs = "" complements = {'a':'T','c':'G','g':'C','t':'A'} for base in sequence[::-1].lower(): rcs += complements[base] return rcs def reading_frames(sequence): """ Funkce reading_frames vrati 6 ctecich ramcu pro preklad do proteinove sekvence. Prvni tri cteci ramce vychazi z puvodni sekvence, druhe tri vychazi z reverzni komplementarni sekvence. Prvni a ctvrty ramec je vracena v cele delce, druhy a paty je posunuty o jednu bazi doprava (puvodni sekvence bez prvniho prvku) a podobne treti a sesty je posunuty o dve baze doprava (puvodni sekvence bez dvou prvku). # HINT: use function reverse_complement_sequence >>> reading_frames('agtagtagt') ['AGTAGTAGT', 'GTAGTAGT', 'TAGTAGT', 'ACTACTACT', 'CTACTACT', 'TACTACT'] >>> reading_frames('GACGCAGTGGATCCGTACAATAG') ['GACGCAGTGGATCCGTACAATAG', 'ACGCAGTGGATCCGTACAATAG', 'CGCAGTGGATCCGTACAATAG', 'CTATTGTACGGATCCACTGCGTC', 'TATTGTACGGATCCACTGCGTC', 'ATTGTACGGATCCACTGCGTC'] """ """ jednoduché řešení, ale stále funkční, ale v rozporu s DRY - don't repeat yourself reading_frames = [] sequence = sequence.upper() reading_frames.append(sequence[0:]) reading_frames.append(sequence[1:]) reading_frames.append(sequence[2:]) rcseq = reverse_complement_sequence(sequence) reading_frames.append(rcseq[0:]) reading_frames.append(rcseq[1:]) reading_frames.append(rcseq[2:]) return reading_frames """ rf = ['','','','','',''] sequence = sequence.upper() cs = reverse_complement_sequence(sequence) for i in range(3): rf[i] = sequence[i:] rf[i+3] = cs[i:] return rf def translate_codon(codon): # credits for Jaroslav Velcovsky # Retrieved from http://www.petercollingridge.co.uk/python-bioinformatics-tools/codon-table bases = ['T', 'C', 'A', 'G'] codons = [a+b+c for a in bases for b in bases for c in bases] # generator - oblibena konstrukce v Pythonu amino_acids = 'FFLLSSSSYY--CC-WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG' codon_table = dict(zip(codons, amino_acids)) if codon in codon_table: return codon_table[codon] return "?" def translate(sequence): """ >>> translate('agtagtagt') ['SSS', 'VV', '--', 'TTT', 'LL', 'YY'] >>> translate('GACGCAGTGGATCCGTACAATAG') ['DAVDPYN', 'TQWIRTI', 'RSGSVQ-', 'LLYGSTA', 'YCTDPLR', 'IVRIHCV'] """ if not is_dna_sequence(sequence): return None protein_sequences = [] for frame in reading_frames(sequence): protein_sequence = "" i = 0 while i+3 <= len(frame): codon = frame[i:i+3].upper() protein_sequence += translate_codon(codon) i += 3 protein_sequences.append(protein_sequence) return protein_sequences import doctest doctest.testmod()