* DISCLAIMER: The following code is provided as is and should be used at your own risk. * No user support is available. * Andrew M Jones, November 2006 * This is the program to estimate the models described in the book * "Applied Econometrics for Health Economists, 2nd Edition" * using Stata. The code is written in a general format with * the dependent variables called yvar (created using "gen yvar = ……") * and the list of independent variables called $xvars (created using * "global xvars "male age….""). The estimation sample for each model * can be selected using the "if" command e.g., observations from the * first wave of HALS can be selected by "if wave==1". /* STATA PROGRAM FOR ANALYSIS OF THE HEALTH AND LIFESTYLE SURVEY */ /* CHAPTER 2: SIMPLE DESCRIPTIVE STATISTICS */ /* LOAD THE STATA DATASET */ use "c:\....\...\your_filename.dta", clear /* CREATE A LOG FILE TO SAVE THE OUTPUT */ log using "c:\...\...\your_filename.log", replace /* CREATE GLOBAL FOR LIST OF VARIABLES TO BE USED IN MODELS */ global xvars "male age age2 age3 ethbawi ethipb ethothnw part unemp retd stdnt keephse lsch14u lsch14 lsch15 lsch17 lsch18 lsch19 regsc1s regsc2 regsc3n regsc4 regsc5n widow single seprd divorce partime retired student keephouse" /* DESCRIPTIVE STATISTICS */ summ $xvars /* SOME DESCRIPTIVE ANALYSIS OF NON-RESPONSE */ gen yvar = sah quietly regr yvar $xvars gen miss=0 recode miss 0=1 if e(sample) sort miss by miss: summ $xvars /* CHAPTER 3: BINARY CHOICES */ /* SELF-ASSESSED HEALTH */ * LINEAR PROBABILITY MODEL (OLS & WLS) regress yvar $xvars, robust predict yf * SAVE COEFFICIENTS matrix blpm=e(b) matrix list blpm scalar bun_lpm=_b[unemp] scalar list bun_lpm * WEIGHTED LEAST SQUARES gen wt=1/(yf*(1-yf)) regress yvar $xvars [aweight=wt] * RESET TEST gen yf2=yf^2 quietly regress yvar $xvars yf2, robust test yf2=0 drop wt yf yf2 * PROBIT MODEL probit yvar $xvars predict yf, xb * PARTIAL EFFECTS (AT MEANS) dprobit yvar $xvars * SAVE COEFFICIENTS matrix bpbt=e(b) matrix list bpbt scalar bun_pbt=_b[unemp] scalar bun_pbt18=_b[unemp]*1.8 scalar bun_pbt16=_b[unemp]*1.6 scalar list bun_pbt bun_pbt18 bun_pbt16 scalar bm_pbt=_b[male] gen mepbt_male = bm_pbt*normden(yf) * MARGINAL EFFECTS gen mepbt_unemp=bun_pbt*normden(yf) * AVERAGE EFFECTS gen aepbt_unemp=0 replace aepbt_unemp=norm(yf+bun_pbt)-norm(yf) if unemp==0 replace aepbt_unemp=norm(yf)-norm(yf-bun_pbt) if unemp==1 summ mepbt_unemp aepbt_unemp * RESET TEST gen yf2=yf^2 quietly probit yvar $xvars yf2 test yf2=0 drop yf yf2 * LOGIT MODEL logit yvar $xvars mfx compute if e(sample) predict yf, xb * SAVE COEFFICIENTS matrix blgt=e(b) matrix list blgt scalar bun_lgt=_b[unemp] scalar list bun_lgt bun_pbt18 bun_pbt16 * MARGINAL EFFECTS gen melgt_unemp=bun_lgt*( exp(yf)/(1+exp(yf)))*(1- exp(yf)/(1+exp(yf))) * AVERAGE EFFECTS gen aelgt_unemp=0 replace aelgt_unemp=exp(yf+bun_lgt)/(1+exp(yf+bun_lgt))-exp(yf)/(1+exp(yf)) if unemp==0 replace aelgt_unemp=exp(yf)/(1+exp(yf))-exp(yf-bun_lgt)/(1+exp(yf-bun_lgt)) if unemp==1 summ mepbt_unemp aepbt_unemp melgt_unemp aelgt_unemp scalar list bun_lpm * RESET TEST gen yf2=yf^2 quietly logit yvar $xvars yf2 test yf2=0 drop yf yf2 /* CHAPTER 4: ORDERED PROBIT MODEL */ replace yvar=saho oprobit yvar $xvars, table predict yhat predict yf, xb gen yf2=yf^2 * PARTIAL EFFECTS FOR P(Y=0) mfx compute, predict(outcome(0)) scalar mu1=_b[_cut1] scalar bunemp=_b[unemp] gen aeop_unemp=0 replace aeop_unemp=norm(mu1-yf-bunemp)-norm(mu1-yf) if unemp==0 replace aeop_unemp=norm(mu1-yf)-norm(mu1-yf+bunemp) if unemp==1 summ aeop_unemp hist aeop_unemp * RESET test quietly oprobit yvar $xvars yf2 test yf2=0 drop yf yf2 /* CHAPTER 5: MULTINOMIAL LOGIT MODEL */ * FOR HEALTH CARE USE gen hosp=hospop==1 | hospip==1 tab visitgp hosp gen use = 0 replace use=1 if visitgp==1 & hosp==0 replace use=2 if hosp==1 replace use=. if visitgp==. tab use * MULTINOMIAL LOGIT replace yvar=use mlogit yvar $xvars * Hausman test of IIA est store hall mlogit yvar $zvars if yvar!=2 est store hpartial hausman hpartial hall, alleqs constant /* CHAPTER 6: BIVARIATE PROBIT MODEL */ gen yvar1=regfag gen yvar2=sah biprobit yvar1 yvar2 $xvars predict yf1, xb1 predict yf2, xb2 drop yf1 yf2 * PARTIAL EFFECT ON MARGINAL DISTRIBUTION scalar bun_pbt=_b[yvar:unemp] gen aepbt_unemp=0 replace aepbt_unemp=norm(yf1+bun_pbt)-norm(yf1) if unemp==0 replace aepbt_unemp=norm(yf1)-norm(yf1-bun_pbt) if unemp==1 summ aepbt_unemp hist aepbt_unemp /* CHAPTER 7: SELECTION BIAS */ * Sample Selection models (SSM) (Heckman selection model/Generalised Tobit model) * Heckman maximum likelihood estimates (FIML) heckman lncig $xvars, select($xvars) * Heckman two step consistent estimates heckman lncig $xvars, select($xvars) twostep mills(imr)¨ probit regfag $xvars predict yfp, xb regre imr $quaneq twoway scatter imr yfp /* CHAPTER 8: THE EVALUATION PROBLEM */ * LINEAR OUTCOME (y2) BINARY TREATMENT (y1) * THE EVALUATION PROBLEM replace yvar2=hyfev1 replace yvar1=regfag * "SELECTION ON OBSERVABLES" APPROACHES:- * i. STANDARD PROBIT MODEL regress yvar2 yvar1 $xvars * ii. INVERSE PROBABILITY WEIGHTED ESTIMATOR probit yvar1 $xvars predict pi, p gen ipw = 1 replace ipw =1/pi if yvar1 == 1 replace ipw=1/(1-pi) if yvar2 == 0 summ ipw regress yvar2 yvar1 [pweight=ipw] * iii. PROPENSITY SCORE MATCHING (DEFAULT OPTION) psmatch2 yvar1 $xvars, out(yvar2) * "SELECTION ON UNOBSERVABLES" APPROACHES:- * HECKMAN TREATMENT EFFECTS MODEL regr yvar2 yvar1 $zvars treatreg yvar2 $xvars, treat(yvar1 = $zvars) twostep treatreg yvar2 $xvars, treat(yvar1 = $zvars) * INSTRUMENTAL VARIABLES ESTIMATOR ivreg yvar2 $xvars (yvar1 = $zvars) * BINARY OUTCOME (y2) BINARY TREATMENT (y1) *RECURSIVE BIVARIATE PROBIT MODEL probit yvar2 yvar1 $xvars dprobit yvar2 yvar1 $xvars biprobit (yvar2=yvar1 $xvars) (yvar1=$xvars) predict yf1, xb1 predict yf2, xb2 * AVERAGE TREATMENT EFFECT (ATE) scalar b1_pbt=_b[yvar1] scalar rho=_b[athrho:_cons] gen ate=0 replace ate=norm(yf1+b1_pbt)-norm(yf1) if yvar1==0 replace ate=norm(yf1)-norm(yf1-b1_pbt) if yvar1==1 summ ate hist ate * AVERAGE TREATMENT EFFECT ON THE TREATED (ATET) gen atet=0 replace atet=norm((yf1+b1_pbt-rho*yf2)/(1-rho^2)^0.5) - norm((yf1-rho*yf2)/(1-rho^2)^0.5) if yvar1==0 replace atet= norm((yf1-rho*yf2)/(1-rho^2)^0.5) - norm((yf1-b1_pbt-rho*yf2)/(1-rho^2)^0.5) if yvar1==1 summ atet if yvar1==1 hist atet if yvar1==1 drop yf1 yf2 ate atet /* CHAPTER 9: COUNT DATA REGRESSSIONS */ replace yvar=fagday * POISSON REGRESSION poisson yvar $xvars * predict exp(xb) predict fitted, n predict yf, xb gen yf2=yf^2 *PARTIAL EFFECTS scalar bunemp=_b[unemp] gen ae_unemp=0 replace ae_unemp=exp(yf+bunemp)-exp(yf) if unemp==0 replace ae_unemp=exp(yf)-exp(yf-bunemp) if unemp==1 summ ae_unemp hist ae_unemp scalar drop bunemp drop ae_unemp * TABULATE ACTUAL AND FITTED VALUES OF Y replace fitted=round(fitted) tab yvar tab fitted tab fitted yvar * RESET TEST quietly poisson yvar $xvars yf2 test yf2 * Pseudo-ML - robust standard errors poisson yvar $xvars, robust drop fitted yf * NEGBIN REGRESSION (NEGBIN2) nbreg yvar $xvars predict yf, xb predict fitted *PARTIAL EFFECTS scalar bunemp=_b[unemp] gen ae_unemp=0 replace ae_unemp=exp(yf+bunemp)-exp(yf) if unemp==0 replace ae_unemp=exp(yf)-exp(yf-bunemp) if unemp==1 summ ae_unemp hist ae_unemp scalar drop bunemp drop ae_unemp replace fitted=round(fitted) tab fitted yvar drop fitted * GENERALISED NEGBIN ln(a)=zd set matsize 100 gnbreg yvar $xvars, lna($xvars) predict fitted replace fitted=round(fitted) tab fitted yvar drop fitted * ZERO-INFLATED POISSON AND NEGBIN MODELS zip yvar $xvars, inflate(_cons) vuong predict fitted predict yf replace fitted=round(fitted) tab fitted yvar drop fitted *PARTIAL EFFECTS FOR ZIP scalar bunemp=_b[unemp] scalar qi=_b[inflate:_cons] scalar qi=exp(qi)/(1+exp(qi)) scalar list qi gen ae_unemp=0 replace ae_unemp=(1-qi)*(exp(yf+bunemp)-exp(yf)) if unemp==0 replace ae_unemp=(1-qi)*(exp(yf)-exp(yf-bunemp)) if unemp==1 summ ae_unemp hist ae_unemp scalar drop bunemp drop ae_unemp zip yvar $xvars, inflate($xvars _cons) predict pi, p predict fitted replace fitted=round(fitted) tab fitted yvar drop fitted /*zinb yvar $xvars, inflate(_cons) vuong predict fitted replace fitted=round(fitted) tab fitted yvar drop fitted zinb yvar $xvars, inflate($xvars|_cons) vuong predict fitted replace fitted=round(fitted) tab fitted yvar drop fitted*/ drop pi * HURDLE MODELS replace yvar1=regfag logit yvar1 $xvars predict yf1, xb predict pi, p ztp yvar $xvars ztnb yvar $xvars /* CHAPTER 9: DURATION ANALYSIS */ /*SURVIVAL TIME is LIFESPAN if the ENTRY DATE is the DATE OF BIRTH and the EXIT DATE is June 2005*/ /*LIFESPAN is LEFT TRUNCATED: DURATION IS OBSERVED ONLY FOR THOSE WHO SURVIVED UP TO THE INTERVIEW DATE*/ stset lifespan, failure(death) id(serno) time0(age) stsum stdes /***PLOT HAZARD AND SURVIVAL FUNCTIONS***/ sts graph, na title("NA ls") saving(lsNA, replace) sts graph, hazard title("KM hazard ls") saving(lsHkm, replace) sts graph, title("KM Survival ls") saving(lsKMsurv, replace) /* Cox PH model */ stcox $xls /*** WEIBULL MODEL***/ streg $xls, d(weibull) nolog /*PH version */ streg $xls, d(weibull) nolog time /*AFT Version*/ stcurve, survival title("Ls survival") saving(lssurv, replace) stcurve, cumh title("Ls cumh") saving(lscumh, replace) stcurve, hazard title("Ls hazard") saving(lshaz, replace) /* CHAPTER 10: PANEL DATA */ * SET INDIVIDUAL (i) AND TIME (t) INDEXES iis serno tis wave sort serno wave /* THE FOLLOWING COMMANDS CREATE INDICATORS OF WHETHER OBSERVATIONS ARE IN THE BALANCED AND UNBALANCED ESTIMATION SAMPLES */ Replace yvar=fagday quietly regr yvar $xvars, robust cluster(pid) gen insampm = 0 recode insampm 0 = 1 if e(sample) sort serno wave gen constant = 1 by serno: egen Ti = sum(constant) if insampm == 1 drop constant sort wave by serno: gen nextwavem = insampm[_n+1] gen allwavesm = . recode allwavesm . = 0 if Ti ~= 8 recode allwavesm . = 1 if Ti == 8 gen numwavesm = . replace numwavesm = Ti * LIST OF Xit PLUS MUNDLAK SPECIFICATION by serno: egen munemp=mean(unemp) etc… global xvarsm "…" /* LINEAR PANEL DATA MODELS */ * SUMMARY STATISTICS - UNBALANCED SAMPLE xtsum yvar $xvars * POOLED REGRESSION - UNBALANCED SAMPLE regr yvar $xvars * WITH ROBUST & CLUSTER TO ALLOW FOR REPEATED OBSERVATIONS regr yvar $xvars, robust cluster(serno) * MUNDLAK WITH ROBUST & CLUSTER TO ALLOW FOR REPEATED OBSERVATIONS regr yvar $xvarsm, robust cluster(serno) * PANEL DATA REGRESSIONS - UNBALANCED SAMPLE * RANDOM EFFECTS MODEL (RE) xtreg yvar $xvars, re * LM TEST FOR SIGNIFICANCE OF INDIVIDUAL EFFECTS xttest0 * HAUSMAN TEST FOR RE V. FE COEFFICIENTS xthaus * RANDOM EFFECTS MODEL (RE) xtreg yvar $xvars if allwavesm==1, re * LM TEST FOR SIGNIFICANCE OF INDIVIDUAL EFFECTS xttest0 * HAUSMAN TEST FOR RE V. FE COEFFICIENTS xthaus * RANDOM EFFECTS MODEL (RE) WITH MUNDLAK xtreg yvar $xvarsm, re xthaus xtreg yvar $xvarsm if allwavesm==1, re xthaus * LEAST SQUARES DUMMY VARIABLE REGRESSION (LSDV) global zvars " male etc…" areg yvar $xvars, absorb(serno) predict ai, d regress ai $zvars if wavenum==1 * FIXED EFFECTS MODEL (FE) xtreg yvar $xvars, fe xtreg yvar $xvars if allwavesm==1, fe * BETWEEN EFFECTS MODEL (BE) xtreg yvar $xvars, be /* NONLINEAR PANEL DATA MODELS */ replace yvar=sah * SUMMARY STATISTICS xtsum yvar $xvars xtsum yvar $xvars if allwavesm==1 * POOLED PROBIT - DPROBIT USED TO OBTAIN APEs dprobit yvar $xvars dprobit yvar $xvars if allwavesm==1 * USING ROBUST INFERENCE TO ALLOW FOR CLUSTERING WITHIN "i" dprobit yvar $xvars, robust cluster(serno) dprobit yvar $xvars if allwavesm==1, robust cluster(serno) * PANEL RE PROBIT xtprobit yvar $xvars quadchk xtprobit yvar $xvars if allwavesm==1 quadchk * RANDOM EFFECTS MODEL (RE) WITH MUNDLAK xtprobit yvar $xvarsm quadchk xtprobit yvar $xvarsm if allwavesm==1 quadchk * CONDITIONAL ("FIXED EFFECTS") LOGIT MODEL (FE) clogit yvar $xvars, group(serno) clogit yvar $xvars if allwavesm==1, group(serno)