from sympy import *
init_printing()
from sympy.utilities.iterables import partitions, multiset, multiset_permutations
import math
n = Symbol('n') # sample size
mu = IndexedBase("mu") # central moments
Suppose $X_i$ are i.i.d. with mean $\mu$ and central moments $\mu_k = E(X-\mu)^k$.
The following function computes $E\left[\left(\sum_{i=1}^n (X_i-\mu)^2\right)^p \left(\sum_{i=1}^n (X_i-\mu)\right)^q \right]$ in terms of $\mu_k$, it will be used later to compute moments of sample variance.
# The follow algorithm is modified from Algorithm 1 of
# Vegas-Sánchez-Ferrero, Gonzalo, et al. "A direct calculation of moments of the sample variance."
# Mathematics and Computers in Simulation 82.5 (2012): 790-804
# in order to express in terms of central moments.
def ESX2pSXq(n, p, q, mu = IndexedBase("mu")):
total = 0
M = p + q
for s in range(1, M + 1):
nCs = binomial(n, s)
parts = partitions(M, s, size = True)
for size, part in parts:
if size != s:
continue
part = [k for k, v in part.items() for j in range(v)]
sigma = int(math.factorial(len(part)) / prod([math.factorial(x) for x in multiset(part).values()]))
perms = multiset_permutations([i + 1 for i, v in enumerate(part) for j in range(v)])
for perm in perms:
c = [0]*M
for j in range(1, M+1):
for i, x in enumerate(perm):
if x == j:
if i < p:
c[j - 1] += 2
else:
c[j - 1] += 1
if 1 in c:
continue
e = 1
for j in range(0, M):
if c[j] > 0:
e *= mu[c[j]]
total += nCs * sigma * e
return total
Example 1: $E\left(\sum_{i=1}^n (X_i-\mu)^2\right) = n Var(X_i)$
ESX2pSXq(n, 1, 0)
Example 2: $E\left[\sum_{i=1}^n (X_i-\mu)\right]^2 = n^2 Var(\bar X)$
ESX2pSXq(n, 0, 2)
We are ready to compute the moments of sample variance $V_n=S_n^2$ because
$$ \begin{align*} E(V_n^k) &= E\left[\left( \frac{1}{n-1}\sum_{i=1}^n (X_i-\bar X)^2 \right)^k\right] \\ & =E\left[\left( \frac{1}{n-1} \left( \sum_{i=1}^n (X_i-\mu)^2 - \sum_{i=1}^n(\bar X - \mu)^2 \right) \right)^k\right] \\ &= \left( \frac{1}{n-1} \right)^k \sum_{r=0}^k \binom{n}{r} (-1)^{k-r} E\left[\left(\sum_{i=1}^n (X_i-\mu)^2\right)^r \left(\sum_{i=1}^n (\bar X -\mu)^2\right)^{k-r} \right] \\ &= \left( \frac{1}{n-1} \right)^k \sum_{r=0}^k \binom{n}{r} (-1)^{k-r} n^{r-k} E\left[\left(\sum_{i=1}^n (X_i-\mu)^2\right)^r \left(\sum_{i=1}^n (X_i-\mu)\right)^{2(k-r)} \right] \end{align*} $$def momentV(n, k, mu = IndexedBase("mu")):
if k == 0:
return 1
total = 0
for r in range(k + 1):
total += binomial(k, r) * n**r * (-1)**(k-r) * ESX2pSXq(n, r, 2*(k-r), mu)
return total / n**k / (n-1)**k
m = [simplify(momentV(n, j)) for j in range(0, 4)]
m
Similar to want we did in the previous post, we consider Taylor’s expanding $g(x) = \sqrt{x}$.
x = Symbol("x")
sigma = Symbol("sigma", positive = True)
lam = IndexedBase("lambda")
g = sqrt(x).series(x, x0=sigma**2, n=4)
g
Now, we are ready to compute $E(S_n) = Eg(S_n^2)$ in terms of the scaled central moments $\lambda_k = \mu_k / \sigma^k$.
Note that $\gamma = \lambda_3$ and $\kappa = \lambda_4$ are usually called skewness and kurtosis.
g_coeffs = list(reversed(g.removeO().as_poly(x).all_coeffs()))
g_coeffs
ES = sum([g_coeffs[i] * m[i] for i in range(0, 4)]).subs({
mu[2]: sigma**2,
mu[3]: lam[3] * sigma**3,
mu[4]: lam[4] * sigma**4,
mu[6]: lam[6] * sigma**6})
ES.simplify()
We then expand it in terms of $1/n$
w = Symbol("1/n")
ES.subs(n, 1/w).series(w, 0, 3)
Finally, we get
$$ E(S_n) =\sigma - \frac{\sigma}{8n}(\lambda_4 - 1) + \frac{\sigma}{16 n^2}(\lambda_6 - 3 \lambda_4 - 6 \lambda_3^2 - 2) + o(n^{-2}) $$and $$ MSE(S_n) = 2 \sigma^2 - 2 \sigma E(S_n) $$
MSES = 2*sigma**2 - 2*sigma*ES
MSES.subs(n, 1/w).series(w, 0, 3)
Therefore $$ MSE(S_n) = \frac{\sigma^2}{4n}(\lambda_4 - 1) - \frac{\sigma^2}{8 n^2}(\lambda_6 - 3 \lambda_4 - 6 \lambda_3^2 - 2) + o(n^{-2}). $$
Comparing the MSE of $\hat \sigma_n = \sqrt{\frac{\sum(X_i - \bar X)^2}{n}}$,
$$ MSE(\hat \sigma_n) = E\left[\sqrt{\frac{n-1}{n}} S_n - \sigma\right]^2 = \frac{n-1}{n} E(S_n^2) - 2 \sqrt{\frac{n-1}{n}}\sigma E(S_n) + \sigma^2. $$MSEsigmahat = (n-1)/n * sigma**2 - 2 * sigma * sqrt((n-1)/n) * ES + sigma**2
MSEsigmahat.subs(n, 1/w).series(w, 0, 3)
which gives
$$ MSE(\hat \sigma_n) = \frac{\sigma^2}{4n}(\lambda_4 - 1) - \frac{\sigma^2}{8 n^2}(\lambda_6 - 2\lambda_4 - 6 \lambda_3^2 - 5) + o(n^{-2}) $$(MSES - MSEsigmahat).subs(n, 1/w).series(w, 0, 3).coeff(w**2)