You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
82 KiB
82 KiB
None
<html lang="en">
<head>
</head>
</html>
In [21]:
# data ussing a linear model
import numpy as np
import matplotlib.pyplot as plt
m = 500
theta0 = 2.5
theta1 = 5
disp = 4
x = np.linspace(0, 5, m)
y = theta0+theta1*x+disp*np.random.randn(m)
$(x_m,y_m)$
In [22]:
plt.scatter(x,y,color='black', alpha=0.3, marker='o')
plt.show()
In [24]:
# Pearson coefficient
xMean = x.mean()
yMean = y.mean()
#Sxx
xSum = 0
for i in range(len(x)):
xSum = xSum+(x[i]-xMean)**2
pass
Sxx = xSum
Syy = np.sum((y-yMean)**2)
Sxy = np.sum((x-xMean)*(y-yMean))
r = Sxy/np.sqrt(Sxx*Syy)
r
Out[24]:
In [26]:
#GDP vs life satisfaction:
import pandas as pd
data = pd.read_csv('gdp-satisfaction.csv')
data
Out[26]:
In [39]:
x = data["GDP per capita"]
y = data["Life satisfaction"]
labels = data["Country"]
labels
Out[39]:
In [47]:
plt.scatter(x,y, color="black", alpha=0.4, marker="v")
# for i, row in data.iterrows():
# plt.annotate(data["Country"], (data["Life satisfaction"], data["GDP per capita"]))
# pass
# #plt.ylim([0,20])
plt.show()