You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

122 KiB

None <html lang="en"> <head> </head>

Sampling variability

We create population (known distribution), then repeatedly take random samples and computes the sample mean. Then, we compare how variability changes for different sample sizes (n).

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
rng = np.random.default_rng(7)
population = rng.lognormal(mean=1.0, sigma=0.6, size=200_000)
popMu = population.mean()
popSD = population.std(ddof=0)
popMu, popSD
Out[15]:
(np.float64(3.2549582990800525), np.float64(2.13814149865377))
In [18]:
def SamplingMeans(popu,n,R):
    rng = np.random.default_rng(7)
    means = np.empty(R)
    for i in range(R):
        sample = rng.choice(popu,size=n, replace=False)
        means[i] = sample.mean()
    return means
In [37]:
meansn10 = SamplingMeans(population, n=30, R=3000)
meansn20 = SamplingMeans(population, n=50, R=3000)
meansn30 = SamplingMeans(population, n=100, R=3000)
In [38]:
np.std(meansn10, ddof=1), np.std(meansn20, ddof=1), np.std(meansn30, ddof=1)
Out[38]:
(np.float64(0.3878374423877889),
 np.float64(0.3033596828581503),
 np.float64(0.21441262896303093))
In [39]:
bins = 10 
plt.figure()
plt.hist(meansn10, bins=bins)
plt.axvline(popMu, color="red", alpha=0.5)
plt.title("Sampling distribution of mean (n=10)")
plt.xlabel("Sample mean"), plt.ylabel("count")
plt.show()
No description has been provided for this image
In [41]:
bins = 10 
plt.figure()
plt.hist(meansn10, bins=bins)
plt.axvline(popMu, color="red", alpha=0.9)
plt.title("Sampling distribution of mean (n=10)")
plt.xlabel("Sample mean"), plt.ylabel("count")
plt.show()

plt.figure()
plt.hist(meansn20, bins=bins)
plt.axvline(popMu, color="red", alpha=0.5)
plt.title("Sampling distribution of mean (n=20)")
plt.xlabel("Sample mean"), plt.ylabel("count")
plt.show()

plt.figure()
plt.hist(meansn30, bins=bins)
plt.axvline(popMu, color="red", alpha=0.5)
plt.title("Sampling distribution of mean (n=30)")
plt.xlabel("Sample mean"), plt.ylabel("count")
plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [29]:
bins = 20
plt.figure()
plt.hist(population, bins=bins)
plt.show()
No description has been provided for this image
In [44]:
bins = 10 
plt.figure()
plt.hist(meansn10, bins=bins,label="n=10")
plt.hist(meansn20, bins=bins, label="n=50")
plt.hist(meansn30, bins=bins, label="n=100")
plt.axvline(popMu, color="red", alpha=0.9)
plt.title("Sampling distribution of mean (n=10)")
plt.xlabel("Sample mean"), plt.ylabel("count")
plt.legend()
plt.show()
No description has been provided for this image
</html>