basic binary classifier

4 months ago · c4e9b59bed
commit c4e9b59bed
3 changed files with 1111 additions and 0 deletions
--- a/Readme.md
+++ b/Readme.md
@ -0,0 +1,499 @@
+```python
+#!pip3 install scikit-learn
+from sklearn import datasets
+iris = datasets.load_iris()
+print(iris.DESCR)
+
+```
+
+    .. _iris_dataset:
+    
+    Iris plants dataset
+    --------------------
+    
+    **Data Set Characteristics:**
+    
+    :Number of Instances: 150 (50 in each of three classes)
+    :Number of Attributes: 4 numeric, predictive attributes and the class
+    :Attribute Information:
+        - sepal length in cm
+        - sepal width in cm
+        - petal length in cm
+        - petal width in cm
+        - class:
+                - Iris-Setosa
+                - Iris-Versicolour
+                - Iris-Virginica
+    
+    :Summary Statistics:
+    
+    ============== ==== ==== ======= ===== ====================
+                    Min  Max   Mean    SD   Class Correlation
+    ============== ==== ==== ======= ===== ====================
+    sepal length:   4.3  7.9   5.84   0.83    0.7826
+    sepal width:    2.0  4.4   3.05   0.43   -0.4194
+    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
+    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)
+    ============== ==== ==== ======= ===== ====================
+    
+    :Missing Attribute Values: None
+    :Class Distribution: 33.3% for each of 3 classes.
+    :Creator: R.A. Fisher
+    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
+    :Date: July, 1988
+    
+    The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
+    from Fisher's paper. Note that it's the same as in R, but not as in the UCI
+    Machine Learning Repository, which has two wrong data points.
+    
+    This is perhaps the best known database to be found in the
+    pattern recognition literature.  Fisher's paper is a classic in the field and
+    is referenced frequently to this day.  (See Duda & Hart, for example.)  The
+    data set contains 3 classes of 50 instances each, where each class refers to a
+    type of iris plant.  One class is linearly separable from the other 2; the
+    latter are NOT linearly separable from each other.
+    
+    .. dropdown:: References
+    
+      - Fisher, R.A. "The use of multiple measurements in taxonomic problems"
+        Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
+        Mathematical Statistics" (John Wiley, NY, 1950).
+      - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.
+        (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
+      - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
+        Structure and Classification Rule for Recognition in Partially Exposed
+        Environments".  IEEE Transactions on Pattern Analysis and Machine
+        Intelligence, Vol. PAMI-2, No. 1, 67-71.
+      - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE Transactions
+        on Information Theory, May 1972, 431-433.
+      - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II
+        conceptual clustering system finds 3 classes in the data.
+      - Many, many more ...
+    
+
+
+
+```python
+import numpy as np 
+import matplotlib.pyplot as plt
+x = iris.data[:,3:4]
+y = (iris.target == 0).astype(int).reshape(-1,1)
+y
+```
+
+
+
+
+    array([[1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [1],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0],
+           [0]])
+
+
+
+
+```python
+def sigmoid(z):
+    z = np.clip(z, -500, 500)
+    sig = 1.0 / (1.0 + np.exp(-z))
+    return sig
+```
+
+
+```python
+def log_loss(y, p, eps=1e-12):
+    p = np.clip(p, eps, 1 - eps)
+    return -np.mean(y*np.log(p) + (1-y)*np.log(1-p))
+```
+
+
+```python
+lr=0.1
+epochs=2000 
+l2=0.0,
+X = np.column_stack([X, np.ones_like(X)])
+m = X.shape[0]
+theta = np.zeros((2,1))
+```
+
+
+```python
+X
+```
+
+
+
+
+    array([[0.2, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.4, 1. ],
+           [0.3, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.1, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.1, 1. ],
+           [0.1, 1. ],
+           [0.2, 1. ],
+           [0.4, 1. ],
+           [0.4, 1. ],
+           [0.3, 1. ],
+           [0.3, 1. ],
+           [0.3, 1. ],
+           [0.2, 1. ],
+           [0.4, 1. ],
+           [0.2, 1. ],
+           [0.5, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.4, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.4, 1. ],
+           [0.1, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.1, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.3, 1. ],
+           [0.3, 1. ],
+           [0.2, 1. ],
+           [0.6, 1. ],
+           [0.4, 1. ],
+           [0.3, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [0.2, 1. ],
+           [1.4, 1. ],
+           [1.5, 1. ],
+           [1.5, 1. ],
+           [1.3, 1. ],
+           [1.5, 1. ],
+           [1.3, 1. ],
+           [1.6, 1. ],
+           [1. , 1. ],
+           [1.3, 1. ],
+           [1.4, 1. ],
+           [1. , 1. ],
+           [1.5, 1. ],
+           [1. , 1. ],
+           [1.4, 1. ],
+           [1.3, 1. ],
+           [1.4, 1. ],
+           [1.5, 1. ],
+           [1. , 1. ],
+           [1.5, 1. ],
+           [1.1, 1. ],
+           [1.8, 1. ],
+           [1.3, 1. ],
+           [1.5, 1. ],
+           [1.2, 1. ],
+           [1.3, 1. ],
+           [1.4, 1. ],
+           [1.4, 1. ],
+           [1.7, 1. ],
+           [1.5, 1. ],
+           [1. , 1. ],
+           [1.1, 1. ],
+           [1. , 1. ],
+           [1.2, 1. ],
+           [1.6, 1. ],
+           [1.5, 1. ],
+           [1.6, 1. ],
+           [1.5, 1. ],
+           [1.3, 1. ],
+           [1.3, 1. ],
+           [1.3, 1. ],
+           [1.2, 1. ],
+           [1.4, 1. ],
+           [1.2, 1. ],
+           [1. , 1. ],
+           [1.3, 1. ],
+           [1.2, 1. ],
+           [1.3, 1. ],
+           [1.3, 1. ],
+           [1.1, 1. ],
+           [1.3, 1. ],
+           [2.5, 1. ],
+           [1.9, 1. ],
+           [2.1, 1. ],
+           [1.8, 1. ],
+           [2.2, 1. ],
+           [2.1, 1. ],
+           [1.7, 1. ],
+           [1.8, 1. ],
+           [1.8, 1. ],
+           [2.5, 1. ],
+           [2. , 1. ],
+           [1.9, 1. ],
+           [2.1, 1. ],
+           [2. , 1. ],
+           [2.4, 1. ],
+           [2.3, 1. ],
+           [1.8, 1. ],
+           [2.2, 1. ],
+           [2.3, 1. ],
+           [1.5, 1. ],
+           [2.3, 1. ],
+           [2. , 1. ],
+           [2. , 1. ],
+           [1.8, 1. ],
+           [2.1, 1. ],
+           [1.8, 1. ],
+           [1.8, 1. ],
+           [1.8, 1. ],
+           [2.1, 1. ],
+           [1.6, 1. ],
+           [1.9, 1. ],
+           [2. , 1. ],
+           [2.2, 1. ],
+           [1.5, 1. ],
+           [1.4, 1. ],
+           [2.3, 1. ],
+           [2.4, 1. ],
+           [1.8, 1. ],
+           [1.8, 1. ],
+           [2.1, 1. ],
+           [2.4, 1. ],
+           [2.3, 1. ],
+           [1.9, 1. ],
+           [2.3, 1. ],
+           [2.5, 1. ],
+           [2.3, 1. ],
+           [1.9, 1. ],
+           [2. , 1. ],
+           [2.3, 1. ],
+           [1.8, 1. ]])
+
+
+
+
+```python
+for i in range(epochs):
+    z = X @ theta               # (m,1)
+    h = sigmoid(z)              # (m,1)
+    grad = (X.T @ (h - y)) / m  # (2,1)  <-- from your formula
+    theta -= lr * grad
+
+    #if (i % 0 == 0 or t == epochs-1):
+     #   print(f"{i:4d}  loss={log_loss(y, h):.6f}  w={theta[0,0]:.6f}  b={theta[1,0]:.6f}")
+
+w, b = theta[0,0], theta[1,0]
+```
+
+
+```python
+w
+```
+
+
+
+
+    np.float64(-5.989972912185251)
+
+
+
+
+```python
+def predict_proba(x, w, b):
+    x = np.asarray(x, float).reshape(-1)
+    return sigmoid(w*x + b)
+
+def predict(x, w, b, thresh=0.5):
+    return (predict_proba(x, w, b) >= thresh).astype(int)
+
+```
+
+
+```python
+rng = np.random.default_rng(0)
+m = 120
+xNew = np.linspace(-1, 3, m)
+p = predict_proba(xNew, w, b)
+print(f"\nLearned: w={w:.3f}, b={b:.3f}, loss={log_loss(p.reshape(-1,1), p.reshape(-1,1)):.4f}")
+```
+
+    
+    Learned: w=-5.990, b=4.280, loss=0.1361
+
+
+
+```python
+yJitter = y +np.random.uniform(-0.1, 0.1, size=y.shape)
+plt.plot(x, yJitter, 'ok', alpha=0.1)
+plt.plot(xNew,p)
+```
+
+
+
+
+    [<matplotlib.lines.Line2D at 0x1154ea850>]
+
+
+
+
+    
+![png](main_files/main_10_1.png)
+    
+
+
+
+```python
+
+```
--- a/main.ipynb
+++ b/main.ipynb
--- a/main_files/main_10_1.png
+++ b/main_files/main_10_1.png