You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
118 KiB
118 KiB
None
<html lang="en">
<head>
</head>
</html>
Data exploration and visualization¶
In [80]:
!pip3 install scikit-learn
In [81]:
from sklearn import datasets
iris = datasets.load_iris()
print(iris.DESCR)
In [82]:
import numpy as np
import matplotlib.pyplot as plt
sl = iris.data[:,0].reshape(-1,1)
sw = iris.data[:,1].reshape(-1,1)
plt.plot(sl, sw, 'ok')
plt.show()
sl.shape
Out[82]:
In [83]:
tg = iris.target
tg.shape
plt.plot(sl[tg==0,0], sw[tg==0,0], 'og', label="Seto")
plt.plot(sl[tg==1,0], sw[tg==1,0], 'or', label="Versi")
plt.plot(sl[tg==2,0], sw[tg==2,0], 'ob', label="Virgi")
plt.legend()
plt.show()
Binary classifier with one parameter¶
In [ ]:
z = np.linspace(-10, 10, 100)
sig = 1/(1+np.exp(-z-4)) + 1/(1+np.exp(-z+4))
plt.plot(z, sig, 'ob')
plt.show()
First classifier¶
$$z = \theta_1\times x_1 + \theta_0$$
In [149]:
pw = iris.data[:, 3].reshape(-1,1)
X = np.c_[np.ones_like(pw), pw]
y = (iris.target==0).astype(int).reshape(-1,1) #Setosa
In [150]:
def sigmoid(z):
#z = np.clip(z, -50, 50)
sig = 1/(1+np.exp(-z))
return sig
In [151]:
def logLoss(y, yModel):
#yModel = np.clip(yModel, 1e-12, 1-1e-12)
loss = -np.mean(y*np.log(yModel)+(1-y)*np.log(1-yModel))
return loss
In [172]:
# Gradient descent
lr = 0.1
epochs = 5000
m = X.shape[0]
np.random.seed(10)
theta = np.random.rand(2,1)
theta
Out[172]:
In [173]:
xNew = np.linspace(-1,3, m)
Xnew = np.c_[np.ones_like(xNew), xNew]
losses = []
for i in range(epochs):
z = X@theta
h = sigmoid(z)
grad = (X.T@(h-y))/m
theta = theta - lr*grad
lossValue = logLoss(y, h)
losses.append(lossValue)
if(i%100==0):
print(f"Epoch {i:4d}, Loss: {lossValue:.6f}")
theta
Out[173]:
In [174]:
plt.plot(losses)
Out[174]:
In [175]:
xNew = np.linspace(-0.5,3, m)
Xnew = np.c_[np.ones_like(xNew), xNew]
yMod = sigmoid(Xnew@theta)
yJitter = y+np.random.uniform(-0.1, 0.1, size=y.shape)
logloss = logLoss(y, sigmoid(X@theta))
print(logloss)
In [169]:
plt.plot(pw, yJitter, 'og', alpha=0.3)
plt.plot(xNew, yMod, ':r')
plt.show()
In [176]:
p_train = sigmoid(X @ theta)
y_hat = (p_train >= 0.5).astype(int) # 0.5 is default; tune if needed
acc = (y_hat == y).mean()
print(f"Train accuracy: {acc:.3f}")
In [ ]: