commit e93e41c96fa518e78b3c2c23d373763525b6de84 Author: Gerardo Marx Date: Sat Feb 17 20:20:13 2024 -0600 Script done diff --git a/main.ipynb b/main.ipynb new file mode 100644 index 0000000..ef87ae9 --- /dev/null +++ b/main.ipynb @@ -0,0 +1,410 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# OLS Ordinary Least Squares\n", + "\n", + "The OLS general model $\\hat{y}$ is defined by: \n", + "\n", + "$$ \\hat{y} = \\theta_0+\\theta_1 x_1 $$\n", + "\n", + "Applying the partial derivatives with rescpect $\\theta_0$ and equaliting to zero:\n", + "\n", + "$$\\frac{\\partial SSR}{\\partial \\theta_0}=0 $$\n", + "\n", + "here SSR is defined as:\n", + "\n", + "$$ \\sum_{i=1}^n (y^i - \\hat{y}^i)^2 $$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Resulting in:\n", + "\n", + "$$ \\theta_0 = \\frac{\\sum_{i=1}^n y^i}{n} - \\frac{\\theta_1 \\sum_{i=1}^n x^i}{n}$$\n", + "\n", + "or \n", + "\n", + "$$ \\theta_0 = \\bar{y} -\\theta_1 \\bar{x} $$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In a similar way, the partial derivative of SSR with respect of $\\theta_1$ will result in: \n", + "\n", + "$$\\theta_1 = \\frac{\\sum_{i=1}^n x^i(y^i-\\bar{y}) }{\\sum_{i=1}^n x^i(x^i-\\bar{x})}$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Implementing OLS in Python" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 3.9654 , 4.50131579, 5.03723158, 5.57314737, 6.10906316,\n", + " 6.64497895, 7.18089474, 7.71681053, 8.25272632, 8.78864211,\n", + " 9.32455789, 9.86047368, 10.39638947, 10.93230526, 11.46822105,\n", + " 12.00413684, 12.54005263, 13.07596842, 13.61188421, 14.1478 ])" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "x = np.linspace(0,4,20)\n", + "theta0 = 3.9654\n", + "theta1 = 2.5456\n", + "y = theta0+theta1*x\n", + "y" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt \n", + "plt.plot(x,y, '.k')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = 4*np.random.rand(50, 1)\n", + "y = theta0 + theta1*x+0.5*np.random.randn(50, 1)\n", + "plt.plot(x,y, '*k')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Implementing with `for` \n", + "$$\\theta_1 = \\frac{\\sum_{i=1}^n x^i(y^i-\\bar{y}) }{\\sum_{i=1}^n x^i(x^i-\\bar{x})}$$" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2.4717291]\n" + ] + } + ], + "source": [ + "# for implementation for computing theta1:\n", + "xAve = x.mean()\n", + "yAve = y.mean()\n", + "num = 0\n", + "den = 0\n", + "for i in range(len(x)):\n", + " num = num + x[i]*(y[i]-yAve)\n", + " den = den + x[i]*(x[i]-xAve)\n", + "theta1Hat = num/den\n", + "print(theta1Hat)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[4.18459936]\n" + ] + } + ], + "source": [ + "# for implementation for theta0:\n", + "# $$ \\theta_0 = \\bar{y} -\\theta_1 \\bar{x} $$\n", + "theta0Hat = yAve - theta1Hat*xAve\n", + "print(theta0Hat)\n", + "#real values are\n", + "#theta0 = 3.9654\n", + "#theta1 = 2.5456" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([2.27654582])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total = 0\n", + "for i in range(len(x)):\n", + " total = total + x[i]\n", + "total/len(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Implementing OLS by numpy methods" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.4717291029649546\n" + ] + } + ], + "source": [ + "# For theta1:\n", + "# $$\\theta_1 = \\frac{\\sum_{i=1}^n x^i(y^i-\\bar{y}) }{\\sum_{i=1}^n x^i(x^i-\\bar{x})}$$\n", + "num2 = np.sum(x*(y-y.mean()))\n", + "den2 = np.sum(x*(x-x.mean()))\n", + "theta1Hat2 = num2/den2\n", + "print(theta1Hat2)\n", + "\n", + "# Efficacy --> time\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4.184599360470533" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "theta0Hat2 = yAve-theta1Hat2*xAve\n", + "theta0Hat2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Comparing Model and Data" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "xNew = np.linspace(0,4,20)\n", + "yHat = theta0Hat + theta1Hat*xNew\n", + "plt.plot(xNew, yHat, '-*r', label=\"$\\hat{y}$\")\n", + "plt.plot(x,y,'.k', label=\"data\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Functions for data and OLS" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "def DataGen(xn: float,n: int, disp,theta0=3.9654,theta1=2.5456):\n", + " x = xn*np.random.rand(n, 1)\n", + " #theta0 = 3.9654\n", + " #theta1 = 2.5456\n", + " y = theta0+theta1*x+disp*np.random.randn(n,1)\n", + " return x,y" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "x,y = DataGen(9, 100, 1, 0,1)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(x,y,'.k')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "def MyOLS(x,y):\n", + " # for implementation for computing theta1:\n", + " xAve = x.mean()\n", + " yAve = y.mean()\n", + " num = 0\n", + " den = 0\n", + " for i in range(len(x)):\n", + " num = num + x[i]*(y[i]-yAve)\n", + " den = den + x[i]*(x[i]-xAve)\n", + " theta1Hat = num/den\n", + " theta0Hat = yAve - theta1Hat*xAve\n", + " return theta0Hat, theta1Hat" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.12539439])" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "the0, the1 = MyOLS(x,y)\n", + "the1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TODO - Students\n", + "- [ ] Efficacy --> time: For method Vs. Numpy" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}