Source code for bioin.bioin

"""
bioin.py
Study notes for beginners bioinformatics

Handles the primary functions
"""
# import sys
# lines = sys.stdin.read().splitlines()


[docs]def canvas(with_attribution=True): """Placeholder function to show example docstring (NumPy format) Replace this function and doc string for your own project Parameters ---------- with_attribution : bool, Optional, default: True Set whether or not to display who the quote is from Returns ------- quote : str Compiled string including quote and optional attribution """ quote = "The code is but a canvas to our imagination." if with_attribution: quote += "\n\t- Adapted from Henry David Thoreau" return quote
[docs]def pattern_count(pattern, text): """ The number of times that a pattern appears as a substring of text. Args: pattern (str): the substring pattern to find in the given text. text (str): the string space for looking. Returns: String, number of substring pattern that appears in text. Examples: Count the frequency (overlapping occurrences also counts) of a substring, i.e. pattern in the given string, i.e. text. >>> pattern = "GCG" >>> text = "GCGCG" >>> count = pattern_count(pattern, text) >>> count 2 """ count = 0 for i in range(len(text) - len(pattern) + 1): if text[i:i + len(pattern)] == pattern: count = count + 1 return count
# print(pattern_count(lines[1], lines[0])) # The Frequent Words Problem # We say that Pattern is a most frequent k-mer in Text if it maximizes PatternCount(Pattern, Text) among all k-mers. # You can verify that "ACTAT" is a most frequent 5-mer for Text = "ACAACTATGCATACTATCGGGAACTATCCT", # and "ATA" is a most frequent 3-mer for Text = "CGATATATCCATAG".
[docs]def frequency_map(text, k): """ Find the frequency of all k-mers in a string. Args: text (str): text. k (int): length of the substring (i.e. kmers). Returns: Dictionary, a dictionary that contains the count of all the k-mers in text. Examples: Computes the frequency map of a given string (i.e. text) and integer (i.e. k). Return a dictionary of the k-mers and the corresponding frequency for all k-mers that appears in text. >>> text = "CGATATATCCATAG" >>> k = 3 >>> kmers_count_map = frequency_map(text, k) >>> kmers_count_map {'CGA': 1, 'GAT': 1, 'ATA': 3, 'TAT': 2, 'ATC': 1, 'TCC': 1, 'CCA': 1, 'CAT': 1, 'TAG': 1} """ freq = {} n = len(text) for i in range(n-k+1): pattern = text[i:i+k] freq[pattern] = 0 for m in range(n-k+1): if text[m:m+k] == pattern: freq[pattern] = freq[pattern] + 1 return freq
[docs]def frequent_words(text, k): """Find all the most frequent k-mers in text. Depend on function frequency_map. Args: text (str): text. k (int): length of the substring (i.e. kmers). Returns: List, a list that contains all the most frequent k-mers in text. Examples: Compare the frequency map of all the k-mers (given string (i.e. text) and integer (i.e. k)), then return a list of the most frequent k-mers. >>> text = "ACGTTGCATGTCGCATGATGCATGAGAGCT" >>> k = 4 >>> kmers_list = frequency_map(text, k) >>> kmers_list ["CATG", "GCAT"] """ words = [] freq = frequency_map(text, k) m = max(freq.values()) for key in freq: # add each key to words whose corresponding frequency value is equal to m if freq[key] == m: words.append(key) return words
[docs]def reverse(pattern): """Reverse a string sequence. For example, if we reverse the string 'ACGT', we would get 'TGCA'.. Args: pattern (str): a DNA string (i.e. pattern). Returns: String, a reversed string of the given pattern. Examples: Reverse a pattern string. >>> pattern = 'AAAACCCGGT' >>> reversed_pattern = reverse(pattern) >>> reversed_pattern 'TGGCCCAAAA' """ reversed_pattern = pattern[::-1] return reversed_pattern
[docs]def complement(pattern): """Compute the complementary string of pattern, with every nucleotide being replaced by its complement. Args: pattern (str): a DNA string pattern. Returns: String, a DNA string pattern in complementary with the given pattern. Examples: Return the complementary string of a pattern string. >>> pattern = 'AAAACCCGGT' >>> complementary_pattern = complement(pattern) >>> complementary_pattern 'TTTTGGGCCA' """ complement_dict = {"A": "T", "C": "G", "G": "C", "T": "A"} complement_pattern = "".join(complement_dict[i] for i in pattern) return complement_pattern
[docs]def reverse_complement(pattern): """Find the reverse complement of a DNA string. This is how DNA is replicated. Args: pattern (str): a DNA string pattern. Returns: String, the reverse complement string of the given pattern string. Examples: Return the reversed complementary string of a pattern string. >>> pattern = 'AAAACCCGGT' >>> reversed_complementary_pattern = reverse_complement(pattern) >>> reversed_complementary_pattern 'ACCGGGTTTT' """ pattern = reverse(pattern) # reverse all letters in a string pattern = complement(pattern) # complement each letter in a string # or simply use return complement(reverse(pattern)) return pattern
if __name__ == "__main__": # Do something if this file is invoked on its own print(canvas())