You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

187 KiB

None <html lang="en"> <head> </head>
In [1]:
# General used libriries
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import binom, poisson, norm, expon 

Bernoulli distribution

In [2]:
# Bernoulli distribution:
p = 0.7
print("P(X=1) = ", p)
print("P(X=0) = ", 1-p)
print("Mean = ", p)
print("Variance = ", p*(1-p))
P(X=1) =  0.7
P(X=0) =  0.30000000000000004
Mean =  0.7
Variance =  0.21000000000000002
In [3]:
# Bernoulli observations 
# repeating a success|failures experiment many times
p = 0.7
n = 1000 #samples
np.random.seed(33)
berData = np.random.binomial(1, p, size=n)
print("First 20 observations:")
print(berData[:20])

print("Sample mean: ", berData.mean())
print("Sample variance: ", berData.var(ddof=1))
First 20 observations:
[1 1 1 1 0 1 1 0 1 1 0 1 1 1 1 0 0 1 1 1]
Sample mean:  0.675
Sample variance:  0.21959459459459454
In [4]:
values, counts = np.unique(berData, return_counts=True)
plt.bar(values, counts/n, width=0.4, color="grey")
plt.xticks([0, 1])
plt.xlabel("Outcomes")
plt.ylabel("Relative frequency")
plt.title("Simulated Bernoulli Distribution")
plt.show()
No description has been provided for this image

Binomial distribution

The binomial distribution counts the number of successes in $n$ independent Bernoulli trials.

In [5]:
p = 0.08
n = 20
k = 2

prob = binom.pmf(k, n, p)
print(f"P(X=2) = {prob:.4f}")
print("Mean= ", n*p)
print("Variance= ", n*p*(1-p))
P(X=2) = 0.2711
Mean=  1.6
Variance=  1.4720000000000002
In [6]:
x = np.arange(0, n+1)
pmf = binom.pmf(x, n, p)
plt.bar(x, pmf, color="grey", width=0.6)
plt.xlabel("Number of succeses")
plt.ylabel("Probability")
plt.title("Binomial distribution: n=20, p=0.08")
plt.show()
No description has been provided for this image
In [7]:
# Binomial simuation
n = 20 #sample
p = 0.08
nPop = 10_000
np.random.seed(33)
binomData = np.random.binomial(n,p,size=nPop)

print("Simulated mean: ", binomData.mean())
print("Theoretical mean: ", n*p)
print("--------------")
print("Simulated var: ", binomData.var(ddof=1))
print("Theoretical var: ", n*p*(1-p))
Simulated mean:  1.6011
Theoretical mean:  1.6
--------------
Simulated var:  1.4717259625962598
Theoretical var:  1.4720000000000002
In [8]:
plt.hist(binomData, bins=np.arange(-0.5,n+1.5,1), density=True, color="grey")
plt.plot(x, pmf, 'ok')
plt.show()
No description has been provided for this image
In [9]:
print(pmf)
20*0.08*(1-0.08)**(19)
[1.88693329e-01 3.28162312e-01 2.71090605e-01 1.41438577e-01
 5.22707783e-02 1.45449122e-02 3.16193744e-03 5.49902164e-04
 7.77035666e-05 9.00910917e-06 8.61740877e-07 6.81218085e-08
 4.44272664e-09 2.37737880e-10 1.03364296e-11 3.59527985e-13
 9.76978221e-15 1.99893242e-16 2.89700351e-18 2.65171946e-20
 1.15292150e-22]
Out[9]:
0.32816231158747255

Poisson distribution

  • $P(X=k) = e^{-\lambda} \lambda^k/k!$
  • $E[X] = \lambda$
  • $Var(X) = \lambda$

Example: Let $X$ be the number of flaws per metre per cable, with average rate $\lambda=3$.

In [14]:
lam = 3
print("P(X=0) = ", poisson.pmf(0, lam))
print("P(X=2) = ", poisson.pmf(2, lam))
print("Mean = ", lam)
print("Var = ", lam)
P(X=0) =  0.049787068367863944
P(X=2) =  0.22404180765538775
Mean =  3
Var =  3
In [22]:
x = np.arange(0, 15)
pmfPoisson = poisson.pmf(x, lam)

plt.bar(x, pmfPoisson, width=0.7)
plt.xlabel("Count")
plt.ylabel("Probability")
plt.title(r"Poisson distribution: $\lambda=3$")
plt.show()
No description has been provided for this image
In [33]:
# Poisson distribution
lam = 3
samples = 10_000
poiData = np.random.poisson(lam, size=samples)

print("Simulated mean: ", poiData.mean())
print("Theoretical mean: ", lam)

print("Simulated Var: ", poiData.var(ddof=1))
print("Theoretical Var: ", lam)
Simulated mean:  2.9953
Theoretical mean:  3
Simulated Var:  2.9753754475447542
Theoretical Var:  3
In [37]:
plt.hist(poiData, bins=np.arange(-0.5, 15.5,1), density=True)
plt.plot(x, poisson.pmf(x, lam),'or')
plt.xlabel("Counts")
plt.ylabel("Relative Freq/Probability")
plt.title("Poisson: Simulated vs Theory")
plt.show()
No description has been provided for this image

Normal distribution

If $X\sim N(\mu, \sigma^2)$, then:

  • $\mu$ is the mean,
  • $\sigma$ is the standard deviation

Example: Suppose a sensor output is a normally distributed with mean 50 and standard deviation 4. We compute the probability that the output is less than 58.

In [45]:
mu = 50 
sigma = 4
proba = norm.cdf(58, loc=mu, scale=sigma)
proba
Out[45]:
np.float64(0.9772498680518208)
In [47]:
x = np.linspace(mu - 4*sigma, mu + 4*sigma, 400)
pdf = norm.pdf(x, loc=mu, scale=sigma)

plt.figure(figsize=(8,4))
plt.plot(x, pdf)
plt.axvline(58, linestyle='--')
plt.xlabel("x")
plt.ylabel("Density")
plt.title("Normal Distribution")
plt.show()
No description has been provided for this image
In [51]:
mu = 50
sigma = 4 
samples = 10_000

normData = np.random.normal(loc=mu, scale=sigma, size=samples)
print("Simulated mean: ", normData.mean())
print("Theoretical mean: ", mu)
print("Simulated std: ", normData.std(ddof=1))
print("Theoretical std: ", sigma)
Simulated mean:  50.004935022325526
Theoretical mean:  50
Simulated std:  3.9780626491745017
Theoretical std:  4
In [58]:
plt.hist(normData, bins=50, density=True)
plt.plot(x, pdf, 'or')
plt.xlabel("Value")
plt.ylabel("Density")
plt.title("Normal: Simulation vs Theory")
plt.show()
No description has been provided for this image

Central Limit Theorem

The central limit theorem (CLT) states that the sampling distribution of the sample mean becomes approximately normal as the sample size increases.

In [61]:
population = np.random.exponential(scale=2, size=100_000)
plt.hist(population, bins=50, density=True)
plt.xlabel("Value")
plt.ylabel("Density")
plt.show()
No description has been provided for this image
In [ ]:
 
In [62]:
def SampleMeans(population, sampleSize, nRep=5_000):
    means = []
    for _ in range(nRep):
        sample = np.random.choice(population, size=sampleSize, replace=True)
        means.append(sample.mean())
        pass
    return np.array(means)
In [63]:
mean5 = SampleMeans(population, 5)
mean30 = SampleMeans(population, 30)
mean100 = SampleMeans(population, 100)
In [68]:
plt.hist(mean5, bins=40, density=True, color="orange",  alpha=0.3)
plt.hist(mean30, bins=40, density=True, color="green",  alpha=0.3)
plt.hist(mean100, bins=40, density=True, color="blue",  alpha=0.3)
plt.show()
No description has been provided for this image
In [72]:
# Numerical Check
print("Population mean: ", population.mean())
print("Population var: ", population.var())

print("Mean of 5 samples: ", mean5.mean())
print("Var of 5 samples: ", mean5.var(ddof=1))
print("Mean of 30 samples: ", mean30.mean())
print("Var of 5 samples: ", mean30.var(ddof=1))
print("Mean of 100 samples: ", mean100.mean())
Population mean:  1.9932073011758906
Population var:  4.025192782387372
Mean of 5 samples:  1.9998045611349438
Var of 5 samples:  0.7837744844351563
Mean of 30 samples:  1.992377755782367
Var of 5 samples:  0.13206867208626297
Mean of 100 samples:  1.994853961830046
</html>