Code
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt
JW Tsai
July 16, 2023
為了之後還可以跑這個模型,根據原本的資料重新模擬一筆資料。
(但這邊沒考慮到原本數值之間的相關情形) 已考慮進去!!用多元常態分配模擬了。
'\n[dat.acd1_eap.mean(), dat.scl_eap.mean(),dat.c1.mean(),dat.c2.mean(),dat.c3.mean()]\nnp.cov([dat.acd1_eap, dat.scl_eap, dat.c1, dat.c2, dat.c3])\n'
dat_mn = np.array(
[2.8510193121856177e-05,
1.9250494837216173e-06,
0.19822282980177716,
0.11483253588516747,
0.11551606288448393]
)
dat_cov = np.array([
[ 0.7959285 , 0.16304568, -0.01541732, -0.01689872, -0.02973076],
[ 0.16304568, 0.51185381, -0.05390401, -0.00531492, -0.02327021],
[-0.01541732, -0.05390401, 0.15903925, -0.022778 , -0.02291358],
[-0.01689872, -0.00531492, -0.022778 , 0.10171555, -0.01327408],
[-0.02973076, -0.02327021, -0.02291358, -0.01327408, 0.10224199]
])
dat = np.random.multivariate_normal(dat_mn, dat_cov, 1000)
dat = pd.DataFrame(dat, columns=['acd1_eap', 'scl_eap', 'c1', 'c2', 'c3'])
dat['c1'] = dat['c1'] > 0.5
dat['c2'] = dat['c2'] > 0.5
dat['c3'] = dat['c3'] > 0.5
acd1_eap | scl_eap | c1 | c2 | c3 | |
---|---|---|---|---|---|
0 | 0.346316 | -0.792151 | True | False | False |
1 | -0.305374 | 1.621382 | False | True | False |
2 | 0.563012 | 0.828286 | False | False | False |
3 | 0.058392 | -0.276411 | False | False | False |
4 | 0.181640 | -1.291429 | False | False | False |
... | ... | ... | ... | ... | ... |
995 | 0.717853 | -0.833434 | True | False | False |
996 | -0.812274 | -0.038150 | False | False | False |
997 | -0.461906 | -0.635552 | False | False | False |
998 | 0.294100 | 0.594206 | False | False | False |
999 | -0.543131 | -0.928300 | True | False | False |
1000 rows × 5 columns
'''
dat_dict = {
'acd1_eap': np.random.normal(loc=2.8510193121856177e-05, scale=0.8921482479092293, size=1000),
'scl_eap': np.random.normal(loc=1.9250494837216173e-06, scale=0.7154395945457549, size=1000),
'c1': np.random.binomial(n=1, p=0.19822282980177716, size=1000),
'c2': np.random.binomial(n=1, p=0.11483253588516747, size=1000),
'c3': np.random.binomial(n=1, p=0.11551606288448393, size=1000),
}
dat = pd.DataFrame(dat_dict)
'''
"\ndat_dict = {\n 'acd1_eap': np.random.normal(loc=2.8510193121856177e-05, scale=0.8921482479092293, size=1000),\n 'scl_eap': np.random.normal(loc=1.9250494837216173e-06, scale=0.7154395945457549, size=1000),\n 'c1': np.random.binomial(n=1, p=0.19822282980177716, size=1000),\n 'c2': np.random.binomial(n=1, p=0.11483253588516747, size=1000),\n 'c3': np.random.binomial(n=1, p=0.11551606288448393, size=1000), \n}\ndat = pd.DataFrame(dat_dict)\n"
with pm.Model() as model:
acd1eap = pm.ConstantData('acd1eap', dat.acd1_eap)
scleap = pm.ConstantData('scleap', dat.scl_eap)
c1 = pm.ConstantData('c1', dat.c1)
c2 = pm.ConstantData('c2', dat.c2)
c3 = pm.ConstantData('c3', dat.c3)
# intercept
acd1eap_Intercept = pm.Normal('acd1eap_Intercept', mu=0, sigma=100)
scleap_Intercept = pm.Normal('scleap_Intercept', mu=0, sigma=100)
# noise
acd1eap_Sigma = pm.HalfCauchy("acd1eap_Sigma", 1)
scleap_Sigma = pm.HalfCauchy("scleap_Sigma", 1)
# slope
acd1eap_scleap = pm.Normal('acd1eap_scleap', mu=0, sigma=100)
acd1eap_c1 = pm.Normal('acd1eap_c1', mu=0, sigma=100)
acd1eap_c2 = pm.Normal('acd1eap_c2', mu=0, sigma=100)
acd1eap_c3 = pm.Normal('acd1eap_c3', mu=0, sigma=100)
scleap_c1 = pm.Normal('scleap_c1', mu=0, sigma=100)
scleap_c2 = pm.Normal('scleap_c2', mu=0, sigma=100)
scleap_c3 = pm.Normal('scleap_c3', mu=0, sigma=100)
# likelihood
pm.Normal("y_likelihood", mu=acd1eap_Intercept + acd1eap_scleap * scleap + acd1eap_c1 * c1 + acd1eap_c2 * c2 + acd1eap_c3 * c3, sigma = acd1eap_Sigma, observed = acd1eap )
pm.Normal('m_likelihood', mu=scleap_Intercept + scleap_c1 * c1 + scleap_c2 * c2 + scleap_c3 * c3, sigma = scleap_Sigma, observed = scleap)
trace_med = pm.sample(2000, chains=4, cores=4)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [acd1eap_Intercept, scleap_Intercept, acd1eap_Sigma, scleap_Sigma, acd1eap_scleap, acd1eap_c1, acd1eap_c2, acd1eap_c3, scleap_c1, scleap_c2, scleap_c3]
Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 3 seconds.
ExecutableNotFound: failed to execute PosixPath('dot'), make sure the Graphviz executables are on your systems' PATH
<graphviz.graphs.Digraph at 0x17209fb80>
mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
acd1eap_Intercept | 0.042 | 0.036 | -0.025 | 0.109 | 0.000 | 0.000 | 9687.0 | 6732.0 | 1.0 |
scleap_Intercept | 0.045 | 0.030 | -0.013 | 0.102 | 0.000 | 0.000 | 10276.0 | 6795.0 | 1.0 |
acd1eap_scleap | 0.374 | 0.038 | 0.305 | 0.445 | 0.000 | 0.000 | 13151.0 | 6209.0 | 1.0 |
acd1eap_c1 | 0.002 | 0.067 | -0.126 | 0.122 | 0.001 | 0.001 | 11284.0 | 6502.0 | 1.0 |
acd1eap_c2 | -0.008 | 0.085 | -0.169 | 0.148 | 0.001 | 0.001 | 12240.0 | 6047.0 | 1.0 |
acd1eap_c3 | -0.049 | 0.088 | -0.210 | 0.120 | 0.001 | 0.001 | 12138.0 | 5986.0 | 1.0 |
scleap_c1 | -0.181 | 0.055 | -0.285 | -0.081 | 0.001 | 0.000 | 11615.0 | 6883.0 | 1.0 |
scleap_c2 | 0.031 | 0.071 | -0.091 | 0.173 | 0.001 | 0.001 | 11680.0 | 7188.0 | 1.0 |
scleap_c3 | -0.076 | 0.073 | -0.225 | 0.053 | 0.001 | 0.001 | 12466.0 | 6045.0 | 1.0 |
acd1eap_Sigma | 0.881 | 0.020 | 0.845 | 0.919 | 0.000 | 0.000 | 14248.0 | 6463.0 | 1.0 |
scleap_Sigma | 0.734 | 0.017 | 0.703 | 0.765 | 0.000 | 0.000 | 12699.0 | 6039.0 | 1.0 |
@online{tsai2023,
author = {Tsai, JW},
title = {Note0716 (Mediation Analysis with Pymc)},
date = {2023-07-16},
langid = {en}
}