You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
158 KiB
158 KiB
None
<html lang="en">
<head>
</head>
</html>
In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("iris_basic.csv")
print(df.head())
In [3]:
x = df["pw"].to_numpy().reshape(-1, 1) # (150,1)
x
Out[3]:
In [4]:
y = df["target"].to_numpy().reshape(-1, 1) # (150,1)
y = (y == 0).astype(float)
y
Out[4]:
In [5]:
def sigmoid(z):
z = np.clip(z, -500, 500)
sig = 1.0 / (1.0 + np.exp(-z))
return sig
In [6]:
def log_loss(y, p, eps=1e-12):
p = np.clip(p, eps, 1 - eps)
return -np.mean(y*np.log(p) + (1-y)*np.log(1-p))
In [7]:
lr=0.1
epochs=2000
l2=0.0,
X = np.column_stack([x, np.ones_like(x)])
m = X.shape[0]
theta = np.zeros((2,1))
theta
Out[7]:
In [8]:
X.T
Out[8]:
In [9]:
for i in range(epochs):
z = X @ theta # (m,1)
h = sigmoid(z) # (m,1)
grad = (X.T @ (h - y)) / m # (2,1) <-- from your formula
theta -= lr * grad
#if (i % 0 == 0 or t == epochs-1):
# print(f"{i:4d} loss={log_loss(y, h):.6f} w={theta[0,0]:.6f} b={theta[1,0]:.6f}")
w, b = theta[0,0], theta[1,0]
In [10]:
def predict_proba(x, w, b):
x = np.asarray(x, float).reshape(-1)
return sigmoid(w*x + b)
def predict(x, w, b, thresh=0.5):
return (predict_proba(x, w, b) >= thresh).astype(int)
In [11]:
rng = np.random.default_rng(0)
m = 120
xNew = np.linspace(-0.5, 2.5, m)
p = predict_proba(xNew, w, b)
print(f"\nLearned: w={w:.3f}, b={b:.3f}, loss={log_loss(p.reshape(-1,1), p.reshape(-1,1)):.4f}")
In [12]:
yJitter = y +np.random.uniform(-0.2, 0.2, size=y.shape)
plt.plot(x, yJitter, 'ok', alpha=0.1)
plt.plot(xNew,p)
Out[12]:
Multi-Parametric Binary Classifier¶
In [13]:
x1 = df["sl"].to_numpy()
x2 = df["sw"].to_numpy()
X = np.column_stack([ np.ones_like(x1), x1, x2])
In [14]:
y = df["target"].to_numpy()
#y = (y == 2).astype(float)
In [15]:
plt.plot(x1[y==0], x2[y==0],'.g' ,label='Set')
plt.plot(x1[y==1], x2[y==1],'.r', label='Ver')
plt.plot(x1[y==2], x2[y==2],'.b', label='Vir')
plt.legend()
plt.show()
In [16]:
y = df["target"].to_numpy()
y = (y == 2).astype(float)
In [17]:
def predict_proba(X, theta):
z = X@theta
return sigmoid(z)
In [18]:
lr=0.01
epochs=5000
m = X.shape[0]
theta = np.random.randn(3)
theta
Out[18]:
In [19]:
for i in range(epochs):
z = X @ theta # (m,1)
h = 1/(1+np.exp(-z)) # (m,1)
grad = (X.T @ (h - y)) / m # (2,1) <-- from your formula
theta -= lr * grad
if (i % 100 == 0):
print(f"{i:4d} loss={log_loss(y, h):.6f}")
theta
Out[19]:
In [20]:
x1New, x2New = np.meshgrid(
np.linspace(3,8,100).reshape(-1,1),
np.linspace(0,6,100).reshape(-1,1))
XNew = np.column_stack([np.ones(x1New.size), x1New.ravel(), x2New.ravel()])
z = XNew @ theta
yPred = 1 / (1 + np.exp(-z))
zz = yPred.reshape(x1New.shape)
zz
Out[20]:
In [21]:
plt.figure(figsize=(8,6))
plt.plot(x1[y==0], x2[y==0],'or' ,label='No Virg')
plt.plot(x1[y==1], x2[y==1],'g^',label='Virginica')
contour = plt.contour(x1New,x2New,zz, linewidths=1)
plt.clabel(contour, inline=1,fontsize=15)
plt.xlabel("Sepal Length")
plt.ylabel("Sepal Width")
plt.legend()
plt.show()
In [22]:
# Softmax model ???