diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..4daf3ee --- /dev/null +++ b/Readme.md @@ -0,0 +1,233 @@ +# OLS Ordinary Least Squares + +The OLS general model $\hat{y}$ is defined by: + +$$ \hat{y} = \theta_0+\theta_1 x_1 $$ + +Applying the partial derivatives with rescpect $\theta_0$ and equaliting to zero: + +$$\frac{\partial SSR}{\partial \theta_0}=0 $$ + +here SSR is defined as: + +$$ \sum_{i=1}^n (y^i - \hat{y}^i)^2 $$ + +Resulting in: + +$$ \theta_0 = \frac{\sum_{i=1}^n y^i}{n} - \frac{\theta_1 \sum_{i=1}^n x^i}{n}$$ + +or + +$$ \theta_0 = \bar{y} -\theta_1 \bar{x} $$ + +In a similar way, the partial derivative of SSR with respect of $\theta_1$ will result in: + +$$\theta_1 = \frac{\sum_{i=1}^n x^i(y^i-\bar{y}) }{\sum_{i=1}^n x^i(x^i-\bar{x})}$$ + +# Implementing OLS in Python + + +```python +import numpy as np +x = np.linspace(0,4,20) +theta0 = 3.9654 +theta1 = 2.5456 +y = theta0+theta1*x +y +``` + + + + + array([ 3.9654 , 4.50131579, 5.03723158, 5.57314737, 6.10906316, + 6.64497895, 7.18089474, 7.71681053, 8.25272632, 8.78864211, + 9.32455789, 9.86047368, 10.39638947, 10.93230526, 11.46822105, + 12.00413684, 12.54005263, 13.07596842, 13.61188421, 14.1478 ]) + + + + +```python +import matplotlib.pyplot as plt +plt.plot(x,y, '.k') +plt.show() +``` + + + +![png](main_files/main_5_0.png) + + + + +```python +x = 4*np.random.rand(50, 1) +y = theta0 + theta1*x+0.5*np.random.randn(50, 1) +plt.plot(x,y, '*k') +plt.show() +``` + + + +![png](main_files/main_6_0.png) + + + +## Implementing with `for` +$$\theta_1 = \frac{\sum_{i=1}^n x^i(y^i-\bar{y}) }{\sum_{i=1}^n x^i(x^i-\bar{x})}$$ + + +```python +# for implementation for computing theta1: +xAve = x.mean() +yAve = y.mean() +num = 0 +den = 0 +for i in range(len(x)): + num = num + x[i]*(y[i]-yAve) + den = den + x[i]*(x[i]-xAve) +theta1Hat = num/den +print(theta1Hat) +``` + + [2.4717291] + + + +```python +# for implementation for theta0: +# $$ \theta_0 = \bar{y} -\theta_1 \bar{x} $$ +theta0Hat = yAve - theta1Hat*xAve +print(theta0Hat) +#real values are +#theta0 = 3.9654 +#theta1 = 2.5456 +``` + + [4.18459936] + + + +```python +total = 0 +for i in range(len(x)): + total = total + x[i] +total/len(x) +``` + + + + + array([2.27654582]) + + + +## Implementing OLS by numpy methods + + +```python +# For theta1: +# $$\theta_1 = \frac{\sum_{i=1}^n x^i(y^i-\bar{y}) }{\sum_{i=1}^n x^i(x^i-\bar{x})}$$ +num2 = np.sum(x*(y-y.mean())) +den2 = np.sum(x*(x-x.mean())) +theta1Hat2 = num2/den2 +print(theta1Hat2) + +# Efficacy --> time + +``` + + 2.4717291029649546 + + + +```python +theta0Hat2 = yAve-theta1Hat2*xAve +theta0Hat2 +``` + + + + + 4.184599360470533 + + + +# Comparing Model and Data + + +```python +xNew = np.linspace(0,4,20) +yHat = theta0Hat + theta1Hat*xNew +plt.plot(xNew, yHat, '-*r', label="$\hat{y}$") +plt.plot(x,y,'.k', label="data") +plt.legend() +plt.show() +``` + + + +![png](main_files/main_15_0.png) + + + +# Functions for data and OLS + + +```python +def DataGen(xn: float,n: int, disp,theta0=3.9654,theta1=2.5456): + x = xn*np.random.rand(n, 1) + #theta0 = 3.9654 + #theta1 = 2.5456 + y = theta0+theta1*x+disp*np.random.randn(n,1) + return x,y +``` + + +```python +x,y = DataGen(9, 100, 1, 0,1) +``` + + +```python +plt.plot(x,y,'.k') +plt.show() +``` + + + +![png](main_files/main_19_0.png) + + + + +```python +def MyOLS(x,y): + # for implementation for computing theta1: + xAve = x.mean() + yAve = y.mean() + num = 0 + den = 0 + for i in range(len(x)): + num = num + x[i]*(y[i]-yAve) + den = den + x[i]*(x[i]-xAve) + theta1Hat = num/den + theta0Hat = yAve - theta1Hat*xAve + return theta0Hat, theta1Hat +``` + + +```python +the0, the1 = MyOLS(x,y) +the1 +``` + + + + + array([1.12539439]) + + + +# TODO - Students +- [ ] Efficacy --> time: For method Vs. Numpy diff --git a/main_files/main_15_0.png b/main_files/main_15_0.png new file mode 100644 index 0000000..9c2e20d Binary files /dev/null and b/main_files/main_15_0.png differ diff --git a/main_files/main_19_0.png b/main_files/main_19_0.png new file mode 100644 index 0000000..5463bc1 Binary files /dev/null and b/main_files/main_19_0.png differ diff --git a/main_files/main_5_0.png b/main_files/main_5_0.png new file mode 100644 index 0000000..afc0e8d Binary files /dev/null and b/main_files/main_5_0.png differ diff --git a/main_files/main_6_0.png b/main_files/main_6_0.png new file mode 100644 index 0000000..3dd0697 Binary files /dev/null and b/main_files/main_6_0.png differ