logistic-regressor-scratch-py

You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

46 KiB

Raw Blame History

None <html lang="en"> <head> </head>

In [2]:

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("iris_basic.csv")
print(df.head())

    sl   sw   pl   pw  target  tNames
0  5.1  3.5  1.4  0.2       0  setosa
1  4.9  3.0  1.4  0.2       0  setosa
2  4.7  3.2  1.3  0.2       0  setosa
3  4.6  3.1  1.5  0.2       0  setosa
4  5.0  3.6  1.4  0.2       0  setosa

In [47]:

x = df["pw"].to_numpy().reshape(-1, 1)      # (150,1)
x

Out[47]:

array([[0.2],
       [0.2],
       [0.2],
       [0.2],
       [0.2],
       [0.4],
       [0.3],
       [0.2],
       [0.2],
       [0.1],
       [0.2],
       [0.2],
       [0.1],
       [0.1],
       [0.2],
       [0.4],
       [0.4],
       [0.3],
       [0.3],
       [0.3],
       [0.2],
       [0.4],
       [0.2],
       [0.5],
       [0.2],
       [0.2],
       [0.4],
       [0.2],
       [0.2],
       [0.2],
       [0.2],
       [0.4],
       [0.1],
       [0.2],
       [0.2],
       [0.2],
       [0.2],
       [0.1],
       [0.2],
       [0.2],
       [0.3],
       [0.3],
       [0.2],
       [0.6],
       [0.4],
       [0.3],
       [0.2],
       [0.2],
       [0.2],
       [0.2],
       [1.4],
       [1.5],
       [1.5],
       [1.3],
       [1.5],
       [1.3],
       [1.6],
       [1. ],
       [1.3],
       [1.4],
       [1. ],
       [1.5],
       [1. ],
       [1.4],
       [1.3],
       [1.4],
       [1.5],
       [1. ],
       [1.5],
       [1.1],
       [1.8],
       [1.3],
       [1.5],
       [1.2],
       [1.3],
       [1.4],
       [1.4],
       [1.7],
       [1.5],
       [1. ],
       [1.1],
       [1. ],
       [1.2],
       [1.6],
       [1.5],
       [1.6],
       [1.5],
       [1.3],
       [1.3],
       [1.3],
       [1.2],
       [1.4],
       [1.2],
       [1. ],
       [1.3],
       [1.2],
       [1.3],
       [1.3],
       [1.1],
       [1.3],
       [2.5],
       [1.9],
       [2.1],
       [1.8],
       [2.2],
       [2.1],
       [1.7],
       [1.8],
       [1.8],
       [2.5],
       [2. ],
       [1.9],
       [2.1],
       [2. ],
       [2.4],
       [2.3],
       [1.8],
       [2.2],
       [2.3],
       [1.5],
       [2.3],
       [2. ],
       [2. ],
       [1.8],
       [2.1],
       [1.8],
       [1.8],
       [1.8],
       [2.1],
       [1.6],
       [1.9],
       [2. ],
       [2.2],
       [1.5],
       [1.4],
       [2.3],
       [2.4],
       [1.8],
       [1.8],
       [2.1],
       [2.4],
       [2.3],
       [1.9],
       [2.3],
       [2.5],
       [2.3],
       [1.9],
       [2. ],
       [2.3],
       [1.8]])

In [48]:

y = df["target"].to_numpy().reshape(-1, 1)  # (150,1)
y = (y == 0).astype(float) 
y

Out[48]:

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])

In [49]:

def sigmoid(z):
    z = np.clip(z, -500, 500)
    sig = 1.0 / (1.0 + np.exp(-z))
    return sig

In [50]:

def log_loss(y, p, eps=1e-12):
    p = np.clip(p, eps, 1 - eps)
    return -np.mean(y*np.log(p) + (1-y)*np.log(1-p))

In [51]:

lr=0.1
epochs=2000 
l2=0.0,
X = np.column_stack([x, np.ones_like(x)])
m = X.shape[0]
theta = np.zeros((2,1))
theta

Out[51]:

array([[0.],
       [0.]])

In [52]:

X.T

Out[52]:

array([[0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1,
        0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2,
        0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2,
        0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5,
        1.5, 1.3, 1.5, 1.3, 1.6, 1. , 1.3, 1.4, 1. , 1.5, 1. , 1.4, 1.3,
        1.4, 1.5, 1. , 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7,
        1.5, 1. , 1.1, 1. , 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2,
        1.4, 1.2, 1. , 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8,
        2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2. , 1.9, 2.1, 2. , 2.4, 2.3, 1.8,
        2.2, 2.3, 1.5, 2.3, 2. , 2. , 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6,
        1.9, 2. , 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9,
        2.3, 2.5, 2.3, 1.9, 2. , 2.3, 1.8],
       [1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ,
        1. , 1. , 1. , 1. , 1. , 1. , 1. ]])

In [53]:

for i in range(epochs):
    z = X @ theta               # (m,1)
    h = sigmoid(z)              # (m,1)
    grad = (X.T @ (h - y)) / m  # (2,1)  <-- from your formula
    theta -= lr * grad

    #if (i % 0 == 0 or t == epochs-1):
     #   print(f"{i:4d}  loss={log_loss(y, h):.6f}  w={theta[0,0]:.6f}  b={theta[1,0]:.6f}")

w, b = theta[0,0], theta[1,0]

In [54]:

def predict_proba(x, w, b):
    x = np.asarray(x, float).reshape(-1)
    return sigmoid(w*x + b)

def predict(x, w, b, thresh=0.5):
    return (predict_proba(x, w, b) >= thresh).astype(int)

In [64]:

rng = np.random.default_rng(0)
m = 120
xNew = np.linspace(-0.5, 2.5, m)
p = predict_proba(xNew, w, b)
print(f"\nLearned: w={w:.3f}, b={b:.3f}, loss={log_loss(p.reshape(-1,1), p.reshape(-1,1)):.4f}")

Learned: w=-5.989, b=4.279, loss=0.1812

In [65]:

yJitter = y +np.random.uniform(-0.2, 0.2, size=y.shape)
plt.plot(x, yJitter, 'ok', alpha=0.1)
plt.plot(xNew,p)

Out[65]:

[<matplotlib.lines.Line2D at 0x112e5cd70>]

No description has been provided for this image

In [ ]:

</html>

46 KiB Raw Blame History

46 KiB

Raw Blame History