# Soluions exercices 30 septembre¶

In [1]:
import numpy as np # import la librairie numpy (calcul scientifique)
import matplotlib.pyplot as plt # librairie pour afficher les graphs.
%matplotlib inline


## 1) Produit scalaire¶

In [2]:
def ps_mat(x,y):

return np.dot(x,y) # ou np.dot(x,y.T)
# ou np.sum(x*y)

In [3]:
def ps_iter(x,y):

s = 0
for i in np.arange(len(x)):
s += x[i] * y[i]

return s

In [4]:
x = np.arange(10)
y = np.arange(10)

In [5]:
ps_mat(x,y)

Out[5]:
285
In [6]:
ps_iter(x,y)

Out[6]:
285
In [7]:
%timeit ps_mat(np.arange(123456),np.arange(123456))

1000 loops, best of 3: 773 Âµs per loop

In [8]:
%timeit ps_iter(np.arange(123456),np.arange(123456))

10 loops, best of 3: 48.3 ms per loop


## 2) Monte Carlo¶

In [9]:
def MonteCarlo(N=1000, scatter = True):

x = np.random.rand(N)
y = np.random.rand(N)

z = x**2 + y**2
n1 = (z<1).tolist().count(True)

if scatter:
plt.figure()
plt.scatter(x[z<1], y[z<1], c='red')
plt.scatter(x[z>1], y[z>1], c='blue')

return n1/N

In [10]:
MonteCarlo(1000)

Out[10]:
0.787
In [11]:
list_N = np.linspace(50,100000,1000)
p = []
for N in list_N:
p.append(MonteCarlo(N, scatter = False)*4)

plt.plot(list_N,p)

Out[11]:
[<matplotlib.lines.Line2D at 0x7f31d341c128>]

# 3) Regression¶

In [23]:
from sklearn import tree

In [157]:
# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

In [158]:
list_score_test = []
list_score_train = []

list_max_depth = np.arange(1,10)
for max_depth in list_max_depth:

# Fit regression model
clf_1 = tree.DecisionTreeRegressor(max_depth=max_depth)
clf_1.fit(X, y)
score_train = clf_1.score(X,y)
list_score_train.append(score_train)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_test = np.sin(X_test)
y_1 = clf_1.predict(X_test)
score_test = clf_1.score(X_test,y_test)
list_score_test.append(score_test)

# Plot the results
plt.figure()
plt.scatter(X, y, c="black", label="data")
plt.plot(X_test, y_1, c="blue", label="max_depth="+str(max_depth), linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Decision Tree Regression. score train = " + str(np.round(score_train,2)) +\
" score test = " + str(np.round(score_test,2)))
plt.legend()
plt.show()

In [159]:
plt.plot(list_max_depth, list_score_train, label = 'train set')
plt.plot(list_max_depth, list_score_test, label = 'test set')
plt.xlabel('max_depth')
plt.ylabel('score')
plt.legend()

Out[159]:
<matplotlib.legend.Legend at 0x7fb1d31a8588>

### Create a validation set¶

In [163]:
list_indices = np.arange(X.shape[0])
np.random.shuffle(list_indices)

n_valid = 15

X_valid = X[list_indices[:n_valid]]
y_valid = y[list_indices[:n_valid]]

X_train = X[list_indices[n_valid:]]
y_train = y[list_indices[n_valid:]]

plt.scatter(X_train, y_train, c='black', label ='train set')
plt.scatter(X_valid, y_valid, c='red', s=40, label = 'valid set')
plt.legend()

Out[163]:
<matplotlib.legend.Legend at 0x7fb1d31daba8>
In [164]:
list_score_test = []
list_score_train = []
list_score_valid = []

list_max_depth = np.arange(1,10)
for max_depth in list_max_depth:

# Fit regression model
clf_1 = tree.DecisionTreeRegressor(max_depth=max_depth)
clf_1.fit(X_train, y_train)
score_train = clf_1.score(X_train,y_train)
list_score_train.append(score_train)

#Valid

score_valid = clf_1.score(X_valid,y_valid)
list_score_valid.append(score_valid)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_test = np.sin(X_test)
y_1 = clf_1.predict(X_test)
score_test = clf_1.score(X_test,y_test)
list_score_test.append(score_test)

# Plot the results
plt.figure()
plt.scatter(X, y, c="black", label="train set")
plt.scatter(X_valid, y_valid, c='red', s=40, label = 'valid set')
plt.plot(X_test, y_1, c="blue", label="max_depth="+str(max_depth), linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Decision Tree Regression. score train = " + str(np.round(score_train,2)) +\
" score test = " + str(np.round(score_test,2)))
plt.legend()
plt.show()

In [165]:
plt.plot(list_max_depth, list_score_train, label = 'train set')
plt.plot(list_max_depth, list_score_test, label = 'test set')
plt.plot(list_max_depth, list_score_valid, label = 'valid set')
plt.xlabel('max_depth')
plt.ylabel('score')
plt.legend()

Out[165]:
<matplotlib.legend.Legend at 0x7fb1d35baa90>
In [166]:
max_depth_train = np.argmax(list_score_train) + 1
max_depth_test = np.argmax(list_score_test) + 1
max_depth_validation = np.argmax(list_score_valid) + 1

print(max_depth_train)
print(max_depth_test)
print(max_depth_validation)

9
3
2


## 4) Classification¶

In [13]:
# Make data
rng = np.random.RandomState(13)
d1 = np.asarray(((np.arange(10) + rng.rand(10),np.arange(10)+ rng.rand(10)))).T
d2 = np.asarray(((np.arange(3,13)+ rng.rand(10),np.arange(10)+ rng.rand(10)))).T
X = np.vstack((d1,d2))
y = [0] * d1.shape[0] + [1] * d2.shape[0]
plt.scatter(X[:,0],X[:,1], c = y, s = 50)

Out[13]:
<matplotlib.collections.PathCollection at 0x7fb1db6015f8>
In [14]:
# Ne pas s'attarder sur cette fonction pour le moment.
def plot_boundary(clf, X, y):
# Affiche la carte avec les zones de chaque catÃ©gorie

plt.figure()
plot_step = 0.02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
np.arange(y_min, y_max, plot_step))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
plt.scatter(X[:,0],X[:,1], c = y)
plt.title('score = ' + str(clf.score(X,y)))

In [15]:
# fait varier max_depth
for max_depth in np.arange(1,10):
clf = tree.DecisionTreeClassifier(max_depth=max_depth)
clf = clf.fit(X, y)
plot_boundary(clf, X,y)

if clf.score(X,y) == 1.:
break

In [ ]:


In [18]:
# 5) Multi class

In [19]:
# Make data
rng = np.random.RandomState(13)
d1 = np.asarray(((np.arange(10) + rng.rand(10),np.arange(10)+ rng.rand(10)))).T
d2 = np.asarray(((np.arange(7,17)+ rng.rand(10),np.arange(10)+ rng.rand(10)))).T
d3 = np.asarray(((np.arange(3,13)+ rng.rand(10),np.arange(10)+ rng.rand(10)))).T
X = np.vstack((d1,d2,d3))
y = [0] * d1.shape[0] + [1] * d2.shape[0] + [2] * d3.shape[0]
plt.scatter(X[:,0],X[:,1], c = y, s = 50)

Out[19]:
<matplotlib.collections.PathCollection at 0x7f4b70116208>
In [20]:
# fait varier max_depth
for max_depth in np.arange(1,10):
clf = tree.DecisionTreeClassifier(max_depth=max_depth)
clf = clf.fit(X, y)
plot_boundary(clf, X,y)
plt.title('score = ' + str(clf.score(X,y)))
#print(clf.score(X,y))
if clf.score(X,y) == 1.:
break

In [ ]:


In [ ]:



# Autres données¶

In [19]:
from sklearn.datasets import make_moons
X,y = make_moons(n_samples=3000, noise=.1, random_state=12)

/home/thomas/anaconda3/lib/python3.4/site-packages/sklearn/datasets/samples_generator.py:612: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
y = np.hstack([np.zeros(n_samples_in, dtype=np.intp),

In [20]:
plt.scatter(X[:,0],X[:,1], c = y, s=90)

Out[20]:
<matplotlib.collections.PathCollection at 0x7fb1da534f60>
In [21]:
# fait varier max_depth
for max_depth in np.arange(1,10):
clf = tree.DecisionTreeClassifier(max_depth=max_depth)
clf = clf.fit(X, y)
plot_boundary(clf, X,y)
plt.title('score = ' + str(clf.score(X,y)))
#print(clf.score(X,y))
if clf.score(X,y) == 1.:
break