Soluions exercices 30 septembre

In [1]:
import numpy as np # import la librairie numpy (calcul scientifique)
import matplotlib.pyplot as plt # librairie pour afficher les graphs.
%matplotlib inline  

1) Produit scalaire

In [2]:
def ps_mat(x,y):
    
    return np.dot(x,y) # ou np.dot(x,y.T)
    # ou np.sum(x*y)
In [3]:
def ps_iter(x,y):
    
    s = 0
    for i in np.arange(len(x)):
        s += x[i] * y[i]
        
    return s
In [4]:
x = np.arange(10)
y = np.arange(10)
In [5]:
ps_mat(x,y)
Out[5]:
285
In [6]:
ps_iter(x,y)
Out[6]:
285
In [7]:
%timeit ps_mat(np.arange(123456),np.arange(123456))
1000 loops, best of 3: 773 µs per loop
In [8]:
%timeit ps_iter(np.arange(123456),np.arange(123456))
10 loops, best of 3: 48.3 ms per loop

2) Monte Carlo

In [9]:
def MonteCarlo(N=1000, scatter = True):
    
    x = np.random.rand(N)
    y = np.random.rand(N)
    
    z = x**2 + y**2
    n1 = (z<1).tolist().count(True)
    
    if scatter:
        plt.figure()
        plt.scatter(x[z<1], y[z<1], c='red')
        plt.scatter(x[z>1], y[z>1], c='blue')
    
    return n1/N
In [10]:
MonteCarlo(1000)
Out[10]:
0.787
In [11]:
list_N = np.linspace(50,100000,1000)
p = []
for N in list_N: 
    p.append(MonteCarlo(N, scatter = False)*4)
    
plt.plot(list_N,p)
Out[11]:
[<matplotlib.lines.Line2D at 0x7f31d341c128>]

3) Regression

In [23]:
from sklearn import tree
In [157]:
# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))
In [158]:
list_score_test = []
list_score_train = []

list_max_depth = np.arange(1,10)
for max_depth in list_max_depth:
    
    # Fit regression model
    clf_1 = tree.DecisionTreeRegressor(max_depth=max_depth)
    clf_1.fit(X, y)
    score_train = clf_1.score(X,y)
    list_score_train.append(score_train)

    # Predict
    X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
    y_test = np.sin(X_test)
    y_1 = clf_1.predict(X_test)
    score_test = clf_1.score(X_test,y_test)
    list_score_test.append(score_test)

    # Plot the results
    plt.figure()
    plt.scatter(X, y, c="black", label="data")
    plt.plot(X_test, y_1, c="blue", label="max_depth="+str(max_depth), linewidth=2)
    plt.xlabel("data")
    plt.ylabel("target")
    plt.title("Decision Tree Regression. score train = " + str(np.round(score_train,2)) +\
            " score test = " + str(np.round(score_test,2)))
    plt.legend()
    plt.show()
In [159]:
plt.plot(list_max_depth, list_score_train, label = 'train set')
plt.plot(list_max_depth, list_score_test, label = 'test set')
plt.xlabel('max_depth')
plt.ylabel('score')
plt.legend()
Out[159]:
<matplotlib.legend.Legend at 0x7fb1d31a8588>

Create a validation set

In [163]:
list_indices = np.arange(X.shape[0])
np.random.shuffle(list_indices)

n_valid = 15

X_valid = X[list_indices[:n_valid]]
y_valid = y[list_indices[:n_valid]]

X_train = X[list_indices[n_valid:]]
y_train = y[list_indices[n_valid:]]

plt.scatter(X_train, y_train, c='black', label ='train set')
plt.scatter(X_valid, y_valid, c='red', s=40, label = 'valid set')
plt.legend()
Out[163]:
<matplotlib.legend.Legend at 0x7fb1d31daba8>
In [164]:
list_score_test = []
list_score_train = []
list_score_valid = []

list_max_depth = np.arange(1,10)
for max_depth in list_max_depth:
    
    # Fit regression model
    clf_1 = tree.DecisionTreeRegressor(max_depth=max_depth)
    clf_1.fit(X_train, y_train)
    score_train = clf_1.score(X_train,y_train)
    list_score_train.append(score_train)
    
    #Valid 
    
    score_valid = clf_1.score(X_valid,y_valid)
    list_score_valid.append(score_valid)

    # Predict
    X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
    y_test = np.sin(X_test)
    y_1 = clf_1.predict(X_test)
    score_test = clf_1.score(X_test,y_test)
    list_score_test.append(score_test)

    # Plot the results
    plt.figure()
    plt.scatter(X, y, c="black", label="train set")
    plt.scatter(X_valid, y_valid, c='red', s=40, label = 'valid set')
    plt.plot(X_test, y_1, c="blue", label="max_depth="+str(max_depth), linewidth=2)
    plt.xlabel("data")
    plt.ylabel("target")
    plt.title("Decision Tree Regression. score train = " + str(np.round(score_train,2)) +\
            " score test = " + str(np.round(score_test,2)))
    plt.legend()
    plt.show()
In [165]:
plt.plot(list_max_depth, list_score_train, label = 'train set')
plt.plot(list_max_depth, list_score_test, label = 'test set')
plt.plot(list_max_depth, list_score_valid, label = 'valid set')
plt.xlabel('max_depth')
plt.ylabel('score')
plt.legend()
Out[165]:
<matplotlib.legend.Legend at 0x7fb1d35baa90>
In [166]:
max_depth_train = np.argmax(list_score_train) + 1
max_depth_test = np.argmax(list_score_test) + 1
max_depth_validation = np.argmax(list_score_valid) + 1

print(max_depth_train)
print(max_depth_test)
print(max_depth_validation)
9
3
2

4) Classification

In [13]:
# Make data
rng = np.random.RandomState(13)
d1 = np.asarray(((np.arange(10) + rng.rand(10),np.arange(10)+ rng.rand(10)))).T
d2 = np.asarray(((np.arange(3,13)+ rng.rand(10),np.arange(10)+ rng.rand(10)))).T
X = np.vstack((d1,d2))
y = [0] * d1.shape[0] + [1] * d2.shape[0]
plt.scatter(X[:,0],X[:,1], c = y, s = 50)
Out[13]:
<matplotlib.collections.PathCollection at 0x7fb1db6015f8>
In [14]:
# Ne pas s'attarder sur cette fonction pour le moment.
def plot_boundary(clf, X, y):
    # Affiche la carte avec les zones de chaque catégorie
    
    plt.figure()
    plot_step = 0.02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                         np.arange(y_min, y_max, plot_step))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
    plt.scatter(X[:,0],X[:,1], c = y)
    plt.title('score = ' + str(clf.score(X,y)))
In [15]:
# fait varier max_depth
for max_depth in np.arange(1,10):
    clf = tree.DecisionTreeClassifier(max_depth=max_depth)
    clf = clf.fit(X, y)
    plot_boundary(clf, X,y)
    
    if clf.score(X,y) == 1.:
        break
In [ ]:
 
In [18]:
# 5) Multi class
In [19]:
# Make data
rng = np.random.RandomState(13)
d1 = np.asarray(((np.arange(10) + rng.rand(10),np.arange(10)+ rng.rand(10)))).T
d2 = np.asarray(((np.arange(7,17)+ rng.rand(10),np.arange(10)+ rng.rand(10)))).T
d3 = np.asarray(((np.arange(3,13)+ rng.rand(10),np.arange(10)+ rng.rand(10)))).T
X = np.vstack((d1,d2,d3))
y = [0] * d1.shape[0] + [1] * d2.shape[0] + [2] * d3.shape[0]
plt.scatter(X[:,0],X[:,1], c = y, s = 50)
Out[19]:
<matplotlib.collections.PathCollection at 0x7f4b70116208>
In [20]:
# fait varier max_depth
for max_depth in np.arange(1,10):
    clf = tree.DecisionTreeClassifier(max_depth=max_depth)
    clf = clf.fit(X, y)
    plot_boundary(clf, X,y)
    plt.title('score = ' + str(clf.score(X,y)))
    #print(clf.score(X,y))
    if clf.score(X,y) == 1.:
        break
In [ ]:
 
In [ ]:
 

Autres données

In [19]:
from sklearn.datasets import make_moons
X,y = make_moons(n_samples=3000, noise=.1, random_state=12)
/home/thomas/anaconda3/lib/python3.4/site-packages/sklearn/datasets/samples_generator.py:612: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = np.hstack([np.zeros(n_samples_in, dtype=np.intp),
In [20]:
plt.scatter(X[:,0],X[:,1], c = y, s=90)
Out[20]:
<matplotlib.collections.PathCollection at 0x7fb1da534f60>
In [21]:
# fait varier max_depth
for max_depth in np.arange(1,10):
    clf = tree.DecisionTreeClassifier(max_depth=max_depth)
    clf = clf.fit(X, y)
    plot_boundary(clf, X,y)
    plt.title('score = ' + str(clf.score(X,y)))
    #print(clf.score(X,y))
    if clf.score(X,y) == 1.:
        break