from IPython.core.display import HTML

def _set_css_style(css_file_path):
   """
   Read the custom CSS file and load it into Jupyter.
   Pass the file path to the CSS file.
   """

   styles = open(css_file_path, "r").read()
   s = '<style>%s</style>' % styles     
   return HTML(s)

_set_css_style('rise.css')

[x for x in range(10) if x % 2 == 0]

squares = []
for x in range(10):
    squares.append(x**2)
squares

squares = [x**2 for x in range(10)]
squares

list(enumerate('ABCD')) # enumerate returns tuples of index,value

{key: val for key, val in enumerate('ABCD') if val not in 'CB'}

{v for v in 'ABCDABCD' if v not in 'CB'}

result = [line.strip().split('\t') for line in open('file') if not line.startswith('#')]

from Bio import AlignIO
a = AlignIO.read('../files/hydra179.aln','clustal')

len(a)

len(a[0]),a.get_alignment_length()

a

from Bio import Phylo
tree = Phylo.read('../files/hydra179.dnd','newick') #must specify format
tree

Phylo.draw_ascii(tree)

%matplotlib inline
Phylo.draw(tree)

Phylo.draw(tree,label_func=lambda x: None)

from Bio import motifs  # lower case for some reason
m = motifs.create(["TACAA","CATGC","TACTA","CCCAA"])

m.counts

m.consensus

m.weblogo('logo.png', alphabet='alphabet_dna', stack_width='large')

from IPython.display import Image
Image(filename='./logo.png')

f = open('../files/MA0004.1.sites') # unlike other parts of Biopython, can't just provide filename to open
arnt = motifs.read(f,'sites') # JASPAR sites

arnt

arnt.consensus

print(arnt.counts)

arnt.alignment.sequences

print(arnt.counts.normalize())

print(arnt.counts.normalize(pseudocounts=0.8))

pwm = arnt.counts.normalize(pseudocounts=0.8)
pssm = pwm.log_odds()
print(pssm)

from Bio import SeqIO
from Bio.Seq import Seq
largeseq = SeqIO.read('../files/bnip3.fasta','fasta') # load with same alphabet as motif
smallseq = Seq('AAACCCACGTGACTATATA')

pwm = arnt.counts.normalize(pseudocounts=0.8)
pssm = pwm.log_odds()
positions = [pos for pos, seq in pssm.search(largeseq.seq)]
len(positions)

results = [(pos, score) for pos, score in pssm.search(largeseq.seq, threshold=4)]
len(results)

results[0]

results[:2]

pos = results[1][0] # -13823
hit = largeseq.seq[pos:pos+len(arnt)]  # negative indices can still be used to retrieve matched subsequence

print(pos,len(largeseq)+pos)
print(hit, hit.reverse_complement())

print(arnt.counts)

Biopython and sequence analysis continued¶

A bit more python... list comprehensions¶

Two ways to do the same thing¶

Other comprehensions¶

Should you use comprehensions?¶

Back to Biopython...¶

Phylogenetic Trees¶

Displaying trees¶

Displaying trees¶

Motifs¶

Motif logos¶

Motif logos¶

Reading motifs¶

Reading motifs¶

Scoring matrices¶

PSSM¶

Searching for motifs¶

Searching for motifs¶

Searching for motifs¶

Some more marine biology¶

Your Herculean Task¶

For next time¶