from IPython.core.display import HTML

def _set_css_style(css_file_path):
   """
   Read the custom CSS file and load it into Jupyter.
   Pass the file path to the CSS file.
   """

   styles = open(css_file_path, "r").read()
   s = '<style>%s</style>' % styles     
   return HTML(s)

_set_css_style('rise.css')

d = dict()
d['key'] = 'value'
d[0] = ['x', 'y', 'z']
print(d)

empty = dict()
alsoempty = {}

example = dict(a=1, b=2)
example

example['a'] = 0
example['z'] = 26
example

example['c'] # keys must exist

'c' in example

if 'c' not in example:
    example['c'] = 0
example

example.keys()

example.values()

example.items()

def count(vals):
    counts = {}
    for x in vals:
        counts[x] += 1
    return counts

count(['a','a','b','a','c','b'])

def count(vals):
    counts = {}
    for x in vals:
        if x not in counts:
            counts[x] = 0
        counts[x] += 1
    return counts

d = count(['a','a','b','a','c','b'])
d

stuff = set(['a','b','a','d','x','a','e'])
stuff

stuff.add('y')

'y' in stuff

stuff2 = set(['a','b','c'])
print('and', stuff & stuff2) # intersection

print('or', stuff | stuff2)

print('diff', stuff - stuff2)

s = set([1,2,2,3,3,3,4,4,4,4])
s

a = set([1,2,2,3])
b = set([2,3,3,4])
c = a & b
c

t = tuple([1,2,3])
t

t = ('x',0,3.0)
l = ['x',0,3.0]
t,l

'%s %d' % ('hello', 3) # second operand of string % operator is tuple

t = ('x', 0, 3.0)
t[2] += 1

example[(1,2)] = 'a'

example[[1,2]] = 'a'

def listcount(l1, l2):
    count = 0
    for x in l1:
            if x in l2:
                count += 1
    return count

def setcount(l1, l2):
    count = 0
    s1 = set(l1)
    s2 = set(l2)
    for x in s1:
            if x in s2:
                count += 1
    return count

import time
l1 = list(range(40000))
l2 = list(range(1000,10000))

t0 = time.time()
listcount(l1,l2)
t1 = time.time()
setcount(l1,l2)
t2 = time.time()
print('listcount time: ',t1-t0,'\nsetcount time:  ',t2-t1)

t0 = time.time()
len(set(l1) & set(l2))
t3 = time.time()
print("set intersection time:",t3-t0)

print(hash(3), hash(1435080909832), hash('cat'), hash((3,'cat')))

s = set([3,999])
s

s.add(1000)
print(s)

s.update([1001,1002,1003])
print(s)

import numpy as np
xvals = np.linspace(-1,2,20) 
yvals = xvals**3 + np.random.random(20) #adds random numbers from 0 to 1 to 20 values of xvals
yvals

import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(xvals,yvals,'o');

deg1 = np.polyfit(xvals,yvals,1)
deg2 = np.polyfit(xvals,yvals,2)
deg3 = np.polyfit(xvals,yvals,3)

deg1

deg2

deg3

p1 = np.poly1d(deg1)
p2 = np.poly1d(deg2)
p3 = np.poly1d(deg3)
p1(2),p2(2),p3(2)

p1(0),p2(0),p3(0)

plt.plot(xvals,yvals,'o',xvals,p1(xvals),'-',xvals,p2(xvals),'-',xvals,p3(xvals),'-');

from scipy.optimize import curve_fit

def tanh(x,a,b):
    return b*np.tanh(a+x)

popt, pconv = curve_fit(tanh, xvals, yvals)
popt

plt.plot(xvals,yvals,'o',xvals,popt[1]*np.tanh(popt[0]+xvals)); plt.show()

!head ../files/aff.min

import numpy as np
 
def makedict(fname):
    f = open(fname)
    retdict = {}
    for line in f:
        (name,value) = line.split()
        retdict[name] = float(value)
    return retdict

kdvalues = makedict('../files/kd')
scorevalues = makedict('../files/aff.score')
minvalues = makedict('../files/aff.min')

names = []
kdlist = []
scorelist = []
minlist = []
for name in sorted(kdvalues.keys()):
    if name in scorevalues and name in minvalues:
        names.append(name)
        kdlist.append(kdvalues[name])
        scorelist.append(scorevalues[name])
        minlist.append(minvalues[name])
        
kds = np.array(kdlist)
scores = np.array(scorelist)
mins = np.array(minlist)

%matplotlib inline
import matplotlib.pylab as plt
plt.plot(kds,scores,'o',alpha=0.5,label='score')
plt.plot(kds,mins,'o',alpha=0.5,label='min')
plt.legend(numpoints=1)
plt.xlim(0,18)
plt.ylim(0,18)
plt.xlabel('Experiment')
plt.ylabel('Prediction')
plt.gca().set_aspect('equal')
plt.show()

import seaborn as sns  # package that sits on top of matplotlib
sns.jointplot(x=kds, y=scores);

sns.jointplot(x=kds, y=scores, kind='hex');

sns.jointplot(x=kds, y=scores, kind='kde');

print("Scores absolute average error:", np.mean(np.abs(scores-kds)))
print("Mins absolute average error:", np.mean(np.abs(mins-kds)))

print("Scores Mean squared error:", np.mean((scores-kds)**2))
print("Mins Mean squared error:", np.mean((mins-kds)**2))

plt.hist(np.square(kds-mins),bins=25,range=(0,25),density=True)
plt.show()

ave = np.mean(kds)
print("Average experimental value",ave)
print("Error of predicting the average",np.mean(np.square(kds-ave)))

np.corrcoef(kds,scores)

np.corrcoef(kds,mins)

import scipy.stats as stats
stats.pearsonr(kds,scores)

stats.spearmanr(kds,scores)

fit = np.polyfit(kds,scores,1)
fit

line = np.poly1d(fit) #converts coefficients into function
line(3)

xpoints = np.linspace(0,18,100) #make 100 xcoords
plt.plot(kds,scores,'o',alpha=0.5,label='score')
plt.plot(kds,mins,'o',alpha=0.5,label='min')
plt.xlim(0,18)
plt.ylim(0,18)
plt.xlabel('Experiment')
plt.ylabel('Prediction')
plt.gca().set_aspect('equal')

plt.plot(xpoints,xpoints,'k')
plt.plot(xpoints,line(xpoints),label="fit",linewidth=2)
plt.legend(loc='lower right')
plt.show()

f2 = np.polyfit(scores,kds,1)
print("Fit:",f2)
fscores = scores*f2[0]+f2[1]
print("Scores Mean squared error:",np.mean(np.square(scores-kds)))
print("Fit Scores Mean squared error:",np.mean(np.square(fscores-kds)))

plt.plot(kds,scores,'o',alpha=0.5,label='score')
plt.plot(kds,fscores,'o',alpha=0.5,label='fit')
plt.xlim(0,18)
plt.ylim(0,18)
plt.xlabel('Experiment')
plt.ylabel('Prediction')
plt.gca().set_aspect('equal')

plt.plot(xpoints,xpoints)
plt.legend(loc='lower right')
plt.show()

stats.pearsonr(kds,fscores)

Dictionaries, sets, and function fitting¶

Dictionary¶

Initializing `dicts`¶

Accessing values¶

Methods¶

What is the output of this function call?¶

Fixing the function¶

Sets¶

`set` operations¶

How many elements are in `s`?¶

What is `c`?¶

Tuples¶

Immutability of tuples¶

Keys¶

Efficiency¶

Which function is faster?¶

Can you think of another way?¶

Do not do membership testing on (large) lists¶

Hashing¶

Hashing¶

Hash Functions¶

Takeaway¶

Function fitting¶

`polyfit`¶

`poly1d`¶

`scipy.optimize.curve_fit`¶

Some examples on data¶

How do we want to load and store the data?¶

How do we want to visualize the data?¶

Aside: Visualizing dense 2D distributions¶

What is the error?¶

Do the predictions correlate with the observed values?¶

What is the linear relationship?¶

What happens if we rescale the predictions?¶

For next time¶

Dictionaries, sets, and function fitting¶

Dictionary¶

Initializing dicts¶

Accessing values¶

Methods¶

What is the output of this function call?¶

Fixing the function¶

Sets¶

set operations¶

How many elements are in s?¶

What is c?¶

Tuples¶

Immutability of tuples¶

Keys¶

Efficiency¶

Which function is faster?¶

Can you think of another way?¶

Do not do membership testing on (large) lists¶

Hashing¶

Hashing¶

Hash Functions¶

Takeaway¶

Function fitting¶

polyfit¶

poly1d¶

scipy.optimize.curve_fit¶

Some examples on data¶

How do we want to load and store the data?¶

How do we want to visualize the data?¶

Aside: Visualizing dense 2D distributions¶

What is the error?¶

Do the predictions correlate with the observed values?¶

What is the linear relationship?¶

What happens if we rescale the predictions?¶

For next time¶

Initializing `dicts`¶

`set` operations¶

How many elements are in `s`?¶

What is `c`?¶

`polyfit`¶

`poly1d`¶

`scipy.optimize.curve_fit`¶