You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
196 KiB
196 KiB
None
<html lang="en">
<head>
</head>
</html>
In [17]:
# data for the OLS
import numpy as np
import matplotlib.pyplot as plt
theta0 = 2.6486
theta1 = 4.98
m = 500
disp = 1
x = np.linspace(0,5,m)
y = theta0+theta1*x+disp*np.random.randn(m)
In [18]:
plt.plot(x,y, '.k')
plt.show()
In [20]:
xMean = x.mean()
yMean = y.mean()
Sxx = np.sum((x - xMean)**2)
Syy = np.sum((y - yMean)**2)
Sxy = np.sum((x - xMean) * (y - yMean))
r = Sxy / np.sqrt(Sxx * Syy)
r
Out[20]:
In [22]:
t1 = Sxy / Sxx
t0 = yMean - t1 * xMean
yHat = t0 + t1 * x
residuals = y - yHat
residuals
Out[22]:
In [23]:
R2 = r**2
R2
Out[23]:
In [25]:
print("Correlation r:", r)
print("Intercept t0:", t0)
print("Slope t1:", t1)
print("R2:", R2)
In [30]:
plt.scatter(x, y, color='black', alpha=0.3)
plt.plot(x, yHat, ':r')
plt.xlabel("x")
plt.ylabel("y")
plt.title("Simple Linear Regression")
plt.show()
In [38]:
plt.scatter(x, residuals, color="red", alpha=0.3)
plt.axhline(0, color='black')
plt.xlabel("x")
plt.ylabel("Residuals")
plt.title("Residual Plot")
plt.show()
In [1]:
import numpy as np
import matplotlib.pyplot as plt
x = np.array([1, 2, 3, 4, 5, 6, 7, 8], dtype=float)
y = np.array([2.2, 4.1, 5.9, 8.3, 10.1, 12.0, 13.8, 16.2], dtype=float)
x_mean = x.mean()
y_mean = y.mean()
Sxx = np.sum((x - x_mean)**2)
Syy = np.sum((y - y_mean)**2)
Sxy = np.sum((x - x_mean) * (y - y_mean))
r = Sxy / np.sqrt(Sxx * Syy)
b1 = Sxy / Sxx
b0 = y_mean - b1 * x_mean
y_hat = b0 + b1 * x
residuals = y - y_hat
R2 = r**2
print("Correlation r:", r)
print("Intercept b0:", b0)
print("Slope b1:", b1)
print("R²:", R2)
plt.scatter(x, y)
plt.plot(x, y_hat)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Simple Linear Regression")
plt.show()
plt.scatter(x, residuals)
plt.axhline(0)
plt.xlabel("x")
plt.ylabel("Residuals")
plt.title("Residual Plot")
plt.show()