* DISCLAIMER: The following code is provided as is and should be used at your own risk. * No user support is available. * Silvia Balia, November 2006 * This program relates to the material described in Chapter 5 of the book * Jones, A.M., Rice, N., Bago d'Uva, T. & Balia, S. (2007) * "Applied Health Economics", London: Routledge {ISBN: 9780415397728} capture log close capture clear version 9 set memory 50m set maxvar 8000 set matsize 10000 log using sb_mvp.log, replace use HALS_APPHEC_mvp.dta, clear set more off ***DESCRIPTIVE STATISTIC*** /*generate global for variable of interest*/ global vars "death sah nsmoker breakfast sleepgd alqprud nobese exercise sc12 sc3 sc45 lhqdg lhqhndA lhqO lhqnone lhqoth married widow divorce seprd single full part unemp sick retd keephse wkshft1 wales north nwest yorks wmids emids anglia swest london scot rural suburb ethwheur male height age age2 housown hou smother mothsmo fathsmo bothsmo alpa alma" /***EXERCISE 1: DESCRIBE THE DATASET AND EXPLORE THE CHARACTERISTICS OF THE SURVEY SAMPLE and SAY SOMETHING ABOUT THE FLAGGING STATUS***/ describe $vars summarize $vars /*Flagging status*/ tab flagcode tab death ***LIFESTYLE AND SOCIO-ECONOMIC STATUS*** /***EXERCISE 2: CALCULATE PARTIAL CORRELATION BETWEEN LIFESTYLES AND SOCIO-ECONOMIC VARIABLES***/ /*PARTIAL CORRELATION ANALYSIS AND PAIRWISE CORRELATION****/ global lifestyles "nsmoker breakfast sleepgd alqprud nobese exercise" foreach x of global lifestyles{ pcorr `x' sc12 sc45 } foreach x of global lifestyles{ pcorr `x' lhqdg lhqhndA lhqoth lhqnone } /***EXERCISE 3: COMPARE THE CHARACTERISTICS OF EACH SUBSAMPLE, i.e. 0 to 2 healthy lifestyles, 3 to 5 and 6***/ /*SAMPLE MEANS for GROUPS OF HEALTHY BEHAVIOURS*/ gen ls1=nsmoker gen ls2=breakfast gen ls3=sleepgd gen ls4=alqprud gen ls5=nobese gen ls6=exercise summ ls1-ls6 egen sumls=rsum(ls1-ls6) summ sumls global zero "sumls==0" global one "sumls==1" global two "sumls==2" global three "sumls==3" global four "sumls==4" global five "sumls==5" global six "sumls==6" global subvars "death sah sc12 sc3 sc45 lhqdg lhqhndA lhqO lhqnone lhqoth full part unemp sick retd keephse wkshft1 male age" sum $subvars if $zero|$one|$two sum $subvars if $three|$four|$five sum $subvars if $six /***EXERCISE 4: CALCULATE PARTIAL CORRELATIONS BETWEEN MORTALITY (and HEALTH) and LIFESTYLES***/ ***HEALTH AND LIFESTYLE*** pcorr death $lifestyles pcorr sah $lifestyles /***EXERCISE 5: CALCULATE THE PEARSON CHI-SQUARED TEST OF INDEPENDECE BETWEEN MORTALITY (and HEALTH) and LIFESTYLES***/ foreach x of global lifestyles{ tab `x' death , chi2 } foreach x of global lifestyles{ tab `x' sah, chi2 } ***DEATHS AND SOCIO-ECONOMIC STATUS*** /*generate 19 dummy variables equals to 1 if the underlying cause of death is in one particular class of diseases*/ icd9 clean ucause, d p icd9 generate u1=ucause, range(001/139) label var u1 "infectious and parastic dis" icd9 generate u2=ucause, range(140/239) label var u2 "neoplasms" icd9 generate u3=ucause, range(240/279) label var u3 "endocrine, nutritional and metabolic dis and immunity disorders" icd9 generate u4=ucause, range(280/289) label var u4 "dis of the blood and blood-forming organs" icd9 generate u5=ucause, range(290/319) label var u5 " menatal disorder" icd9 generate u6=ucause, range(320/389) label var u6 "dis of the nervous sustem and sense organs" icd9 generate u7=ucause, range(390/459) label var u7 "dis of the circulatory system" icd9 generate u8=ucause, range(460/519) label var u8 "dis of the respiratory system" icd9 generate u9=ucause, range(520/579) label var u9 "dis of the digestive system" icd9 generate u10=ucause, range(580/629) label var u10 "dis of the genitourinary system" icd9 generate u11=ucause, range(630/679) label var u11 "complications of pregnancy, childbirth and the puerperium" icd9 generate u12=ucause, range(680/709) label var u12 "dis of the skin and subcutaneous tissue" icd9 generate u13=ucause, range(710/739) label var u13 "dis of the musculoskeletal system and connective tissue" icd9 generate u14=ucause, range(740/759) label var u14 "congenital anomalies" icd9 generate u15=ucause, range(760/779) label var u15 "certain conditions originating in the perinatal period" icd9 generate u16=ucause, range(780/799) label var u16 "symptoms, signs, and ill-defined conditions" icd9 generate u17=ucause, range(800/999) label var u17 "injury and poisoning" icd9 generate u18=ucause, range(E800/E999) label var u18 "supplementary classification of external causes of injury and poisoning" icd9 generate u19=ucause, range(V01/V83) label var u19 "supplementary classification of factors influencing health status and contact with health services" gen cd=0 /*cd=0 if missing cause*/ replace cd=1 if u1==1 replace cd=2 if u2==1 replace cd=3 if u3==1 replace cd=4 if u4==1 replace cd=5 if u5==1 replace cd=6 if u6==1 replace cd=7 if u7==1 replace cd=8 if u8==1 replace cd=9 if u9==1 replace cd=10 if u10==1 replace cd=11 if u11==1 replace cd=13 if u12==1 replace cd=14 if u14==1 replace cd=15 if u15==1 replace cd=16 if u16==1 replace cd=17 if u17==1 replace cd=18 if u18==1 replace cd=19 if u19==1 /*CAUSES OF DEATH AND PERCENTAGE IN THE SAMPLE OF DEATHS AND BY SOCIAL CLASS*/ /***EXERCISE 6: show the percentage of each cause of death and by social class (scgr)***/ tab cd if death==1 sort scgr by scgr: tab cd if death==1 /*DEATH RATES by social classes, education, gender and sah*/ global varlist "sc12 sc3 sc45 lhqdg lhqhndA lhqO lhqnone male sah" /***EXERCISE 7: CALCULATE THE DEATH RATE FOR EACH VARIABLE OF VARLIST***/ foreach x of global varlist{ qui count if `x'==1&death==1 scalar d`x'=r(N) disp "`x'=1 and death=1:d`x'= " d`x' qui count if `x'==1 scalar n`x'=r(N) disp "`x'=1:n`x'= " n`x' scalar drate`x'=(d`x'/n`x')*100 disp "death rate:d`x'/n`x'= " drate`x' } /* qui count if male==0&death==1 scalar dfem=r(N) qui count if male==0 scalar fem=r(N) scalar dratef=(dfem/fem)*100 disp dratef */ ***ECONOMETRIC MODELS*** /*DEFINE RIGHT HAND SIDE*/ global rhs0 "sc12 sc45 lhqdg lhqhndA lhqnone lhqoth widow divorce seprd single part unemp sick retd keephse wkshft1 wales north nwest yorks wmids emids anglia swest london scot rural suburb ethwheur housown hou height male age age2 smother mothsmo fathsmo bothsmo" global rhs1 "sc12 sc45 lhqdg lhqhndA lhqnone lhqoth part unemp sick retd keephse wkshft1 rural suburb ethwheur height male age age2" global rhs2 "sc12 sc45 lhqdg lhqhndA lhqnone lhqoth widow divorce seprd single part unemp sick retd keephse wkshft1 wales north nwest yorks wmids emids anglia swest london scot rural suburb ethwheur housown hou height male age age2" global rhs3 "sc12 sc45 lhqdg lhqhndA lhqnone lhqoth widow divorce seprd single part unemp sick retd keephse wkshft1 wales north nwest yorks wmids emids anglia swest london scot rural suburb ethwheur housown hou height male age age2 smother mothsmo fathsmo bothsmo" /***EXERCISE 8: COMPARE A PROBIT MODEL FOR MORTALITY WITH LIFESTYLES (EXOGENOUS MODEL) WITH A MODEL THAT EXCLUDES endogenous variable. USE GLOBAL rhs1 - exclusion restrictions approach***/ ***UNIVARIATE PROBIT MODEL 1, for mortality, with health and lifestyle*** probit death sah nsmoker $rhs1, nolog ***UNIVARIATE PROBIT MODEL 2, for mortality, without health and lifestyle*** probit death $rhs1, nolog /***EXERCISE 9: COMPARE THE EXCLUSION RESTRICTIONS APPROACH WITH THE NON-EXCLUSION RESTRICTIONS APPROACH: TEST FOR MISSPECIFACTION OF THE MODEL and CALCULATE AIC and BIC using ereturn list to save results; USE GLOBALS rhs1 and rhs0***/ ***UNIVARIATE PROBIT MODEL 1, Maddala's approach*** qui probit death sah nsmoker $rhs1 /*Information Criteria using fitstat; AIC=(Dev+2q)/N and BIC=Dev-(N-q)log(N), where Dev=-2logL*/ fitstat,saving(m1) /*hand calculation: AIC=-2logL+2q and BIC=-2logL+log(N)q*/ count scalar N=r(N) disp "N= " N ereturn list scalar logL=e(ll) disp "logL= " logL scalar q=e(df_m) +1 disp "q= " q scalar AIC=-2*logL+2*q disp "AIC= " AIC scalar BIC=-2*logL+log(N)*q disp "BIC= " BIC /*RESET test*/ qui probit death $rhs1 predict yhat, xb gen yhat2=yhat^2 qui probit death $rhs1 yhat2 test yhat2=0 drop yhat yhat2 ***UNIVARIATE PROBIT MODEL for mortality, Wilde's approach*** probit death sah nsmoker $rhs0, nolog /*Information Criteria using fitstat; AIC=(Dev+2q)/N and BIC=Dev-(N-q)log(N), where Dev=-2logL*/ fitstat,using(m1) /*hand calculation: AIC=-2logL+2q and BIC=-2logL+log(N)q*/ count scalar N=r(N) disp "N= " N scalar logL=e(ll) disp "logL= " logL scalar q=e(df_m) +1 disp "q= " q scalar AIC=-2*logL+2*q disp "AIC= " AIC scalar BIC=-2*logL+log(N)*q disp "BIC= " BIC /*RESET test*/ predict yhat, xb gen yhat2=yhat^2 qui probit death sah nsmoker $rhs0 yhat2/*,nolog*/ test yhat2=0 drop yhat yhat2 ***AVERAGE PARTIAL EFFECTS*** probit death sah nsmoker $rhs1,nolog dprobit death sah nsmoker $rhs1,nolog /*dprobit calculates by defaults partial effects at the means*/ /*this programmes calulate APE for each observation using the latent index (xb) and the probability of a non-zero dependent variable (p) for each observation*/ /*PARTIAL EFFECTS FOR CONTINUOUS VARIABLES*/ capture program drop meffcon program define meffcon version 9 args pred beta quietly { gen meffcon = (`beta')*(normden(`pred')) } summarize meffcon drop meffcon end /*AVERAGE EFFECTS FOR BINARY VARIABLES*/ capture program drop meffdum program define meffdum version 9 args pmarg pred beta covar quietly { gen meffdum = (`pmarg'-norm(`pred'-`beta')) replace meffdum= norm(`pred'+`beta')-(`pmarg') if (`covar')==0 } summarize meffdum drop meffdum end predict xb, xb predict pmarg, p global xcont "height age age2" global xdum "sah nsmoker sc12 sc45 lhqdg lhqhndA lhqnone lhqoth part unemp sick retd keephse wkshft1 rural suburb ethwheur male" foreach x of global xcont{ disp "`x'" meffcon xb _b[`x'] } foreach x of global xdum{ disp "`x'" meffdum pmarg xb _b[`x'] `x' } drop xb pmarg ***RECURSIVE BIVARIATE PROBIT MODEL for HEALTH and SMOKING*** probit sah nsmoker $rhs2,nolog biprobit (sah=nsmoker $rhs2) (nsmoker=$rhs3) ***AVERAGE TREATMENT EFFECT (ATE)*** /***EXERCISE 10: CALCULATE THE ATE of THE SMOKING VARIABLE on HEALTH. USE THE PROBIT LINEAR PREDICTION FOR EQUATION 1.***/ predict xb1, xb1 /*partial effect of P(y=1) = P(SAH=1) = PHI(x1b1) with respect to nsmoker*/ scalar bsmk=_b[nsmoker] gen ate=0 replace ate=norm(xb1+bsmk)-norm(xb1) if nsmoker==0 replace ate=norm(xb1)-norm(xb1-bsmk) if nsmoker==1 summ ate summ ate if nsmoker==1 *hist ate *hist ate if nsmoker==1 drop xb1 ate ***RECURSIVE MULTIVARIATE PROBIT MODEL for MORTALITY, HEALTH and SMOKING*** /*run time increases linearly in the number of random draws*/ /*** EXERCISE 11: ESTIMATE A TRIVARIATE PROBIT MODEL FOR MORTALITY, HEALTH AND SMOKING using THE NON EXCLUSION RESTRICTIONS APPROACH and CALCULATE AIC AND BIC***/ /*Wilde's approach: MVP0*/ mvprobit (death = sah nsmoker $rhs0) (sah = nsmoker $rhs0) (nsmoker = $rhs0) /*Statistical fit*/ scalar logLmvp0=e(ll) disp "logL= " logLmvp0 scalar q0=e(k) disp "q= " q0 scalar AIC=-2*logLmvp0+2*q0 disp "AIC= " AIC scalar BIC=-2*logLmvp0+log(N)*q0 disp "BIC= " BIC /*** EXERCISE 12: RE-ESTIMATE THE SAME MODEL USING EXCLUSION RESTRICTIONS, CALCULATE AIC, BIC and LR-TEST and COMPARE***/ /*Maddala's approach - exclusion restrictions: MVP1*/ mvprobit (death = sah nsmoker $rhs1) (sah = nsmoker $rhs2) (nsmoker = $rhs3)/*,dr(43)*/ /*Statistical fit*/ scalar logLmvp1=e(ll) disp "logL= " logLmvp1 scalar q1=e(k) disp "q= " q1 scalar AIC=-2*logLmvp1+2*q1 disp "AIC= " AIC scalar BIC=-2*logLmvp1+log(N)*q1 disp "BIC= " BIC /*LR test=2*(logL_unrestr- logL_restr): the unrestr model is the Wilde specification, the restr model is the Maddala specification*/ scalar testLR =2*(logLmvp0- logLmvp1) disp testLR scalar qLR= q0-q1 disp "q= " qLR disp chi2tail(qLR, testLR) /*calculate APE for the preferred specification*/ mvppred xb /*linear predictions*/ drop xb2 xb3 mvppred stdp, stdp /*standard error of the linear prediction (Xb) for each equation*/ drop stdp2 stdp3 mvppred pall, pall /*probit predicted joint probability of success in every outcome*/ mvppred pmarg, pmarg /*marginal probit predicted probability of success for each outcome*/ drop pmarg2 pmarg3 sum xb1 pmarg1 stdp1 pall1s pall0s foreach x of global xcont{ disp "`x'" meffcon xb _b[`x'] } foreach x of global xdum{ disp "`x'" meffdum pmarg xb _b[`x'] `x' } ***Testing endogeneity*** /*LR-test*/ qui probit death sah nsmoker $rhs1 scalar logL1=e(ll) qui probit sah nsmoker $rhs2 scalar logL2=e(ll) qui probit nsmoker $rhs3 scalar logL3=e(ll) scalar logL_restr=logL1+logL2+logL3 disp logL_restr scalar testLR =2*(logLmvp1 -logL_restr) disp testLR disp chi2tail(3, testLR) /***EXERCISE 13: TEST ENDOGENEITY OF THE SINGLE CORRELATION COEFFICIENTS: Z-TEST, AND COMMENT***/ log close