* DISCLAIMER: The following code is provided as is and should be used at your own risk. * No user support is available. * Silvia Balia, November 2006 * This program relates to the material described in Chapter 6 of the book * Jones, A.M., Rice, N., Bago d'Uva, T. & Balia, S. (2007) * "Applied Health Economics", London: Routledge {ISBN: 9780415397728} capture log close capture clear version 9 log using practical_duration.log, replace use hals_duration.dta, clear set more off ***DESCRIPTIVE STATISTIC*** global vars "birthmth birthday birthyr seenmth seenday seenyr age death deathage agestrt exfag exfagan regfag sc12 sc3 sc45 lhqdg lhqoth lhqnone lhqO lhqA lhqhnd rural married widow sepdiv single part unemp sick retd keephse wkshft1 housown hou suburb mothsmo fathsmo bothsmo smother male" describe $vars /*Define TIME VARIABLES*/ /***Define AGE at STARTING SMOKING***/ gen agestart = . replace agestart = agestrt*10 /***Define dummy for EVERSMOKER***/ drop if regfag==1 & agestart==0 /*drop individuals who reported to be current smokers but did not report age at starting*/ gen start=. replace start = 1 if agestart>0 replace start = 0 if start==. label variable start "eversmoker" /***Define duration variable for STARTING***/ gen starting=agestart if start==1 replace starting=age if start==0 label variable starting "number of years non-smoking" /***Define dummy for QUITTERS***/ gen quit=. replace quit=1 if exfag==1&exfagan<98 replace quit=0 if regfag==1 /***Generate SEEN DATE***/ gen seenmdy=mdy(seenmth, seenday, seenyr+1900) format seenmdy %d /***Generate DATE OF BIRTH***/ generate birthmdy = mdy(birthmth,birthday,birthyr+1800) if birthyr>=87 format birthmdy %d replace birthmdy =mdy(birthmth,birthday,birthyr+1900) if birthyr<87 format birthmdy %d list birthmdy in 1/10 /***Define YEAR of STARTING***/ summ agestart if start==1,d gen startmdy=birthmdy+(agestart*365.25) if start==1 replace startmdy=birthmdy+(17*365.25) if start==0 /*17 is the median age at starting*/ format startmdy %d gen strtyear=year(startmdy) gen year=strtyear-1904 /*Rescale year of starting: 1904 is the smallest year of starting*/ gen lnyear=ln(year) /***Generate duration variable for quitting: SMOKE_YEARS variable***/ gen sm_years=(seenmdy-startmdy)/365.25 if quit==0 /*current smokers*/ replace sm_years=(seenmdy-startmdy-(exfagan*365.25))/365.25 if quit==1 /*ever smoked at least 1 fag as long as 6 months*/ gen sm_years2=round(sm_years,1) drop sm_years rename sm_years2 sm_years /***Generate AGE AT CENSORING, at April 2005***/ generate ageATcens=( mdy(4,1,2005) - birthmdy) / 365.25 if death==0 /***Generate duration variable for LIFESPAN***/ gen lifespan = . replace lifespan = deathage if death==1 replace lifespan = ageATcens if death==0 label var lifespan "Survivor time: censoring at April 2005" /*CLEANING THE DATA:*/ count if sm_years<=0 drop if sm_years<=0 /*delete negative and null years of smoking*/ count if exfag==1&exfagan==. drop if exfag==1&exfagan==. /*delete ex smokers if didn't declare how long ago they quitted*/ count if exfag==1 & agestart==0 drop if exfag==1 & agestart==0 /*delete ex smokers if didn't declare age at starting*/ tab sm_years count summarize $vars start quit starting sm_years lifespan summ age if death==0 summ deathage if death==1,d /***DURATION MODEL for Starting***/ /*Non-parametric analysis*/ tab start su starting, de stset starting, failure(start) /*id(serno)*/ stsum stdes sts graph, title("Kaplan-Meier Survivor-starting") saving(KMsurv_start, replace) /*Kaplan-Meier Survivor estimate*/ sts graph, hazard title("Kaplan-Meier Hazard-starting") saving(KMhazstart, replace) sts graph, na title("Nelson-Aalen cumulative Hazard function-starting") saving(NAcumhazstart, replace) gr combine "KMsurv_start" "KMhazstart" "NAcumhazstart", saving(startingNP, replace) /***EXERCISE 1: GRAPH THE SURVIVOR, THE CUMULATIVE HAZARD AND THE HAZARD CONTRIBUTION FUNCTIONS USING sts. use help sts if needed***/ /***PARAMETRIC ANALYSIS***/ /*Generate age*/ gen lnage=ln(age) /*global of regressors*/ global xstart "sc12 sc45 lhqdg lhqhndA lhqnone lhqoth rural suburb mothsmo fathsmo bothsmo smother male lnage" /*INFORMATION CRITERIA and COX SNELL RESIDUALS TEST */ stset, clear qui stset starting, failure(start) id(serno) qui streg $xstart, d(exp) time estat ic estimates store exp predict double cs, csnell stset, clear qui stset cs, failure (start) qui sts gen km=s qui gen double H=-ln(km) qui line H cs cs, sort title("Exponential") leg(off) ylabel(0 (0.5)3) ytitle("Cumulative Hazard", size(small) margin(medsmall))xtitle(Cox-Snell Residuals, size(small) margin(medsmall))saving("ExpCSstart",replace) stset, clear qui stset starting, failure(start) qui streg $xstart, d(w) time estat ic estimates store weibull predict double cs2, csnell stset, clear qui stset cs2, failure (start) qui sts gen km2=s qui gen double H2=-ln(km2) qui line H2 cs2 cs2, sort title("Weibull") leg(off) ylabel(0 (0.5)3)ytitle("Cumulative Hazard", size(small) margin(medsmall)) xtitle(Cox-Snell Residuals, size(small) margin(medsmall)) saving("WeibCSstart",replace) stset, clear qui stset starting, failure(start) qui streg $xstart, d(lognormal) estat ic estimates store logN predict double cs3, csnell stset, clear qui stset cs3, failure (start) qui sts gen km3=s qui gen double H3=-ln(km3) qui line H3 cs3 cs3, sort title("logNormal")leg(off) ylabel(0 (0.5)3) ytitle("Cumulative Hazard", size(small) margin(medsmall)) xtitle(Cox-Snell Residuals, size(small) margin(medsmall)) saving("LogNormalCSstart", replace) stset, clear qui stset starting, failure(start) qui streg $xstart, d(loglogistic) estat ic estimates store logL predict double cs4, csnell stset, clear qui stset cs4, failure (start) qui sts gen km4=s qui gen double H4=-ln(km4) qui line H4 cs4 cs4, sort title("logLogistic") ylabel(0 (0.5)3) legend( off)ytitle("Cumulative Hazard", size(small) margin(medsmall)) xtitle(Cox-Snell Residuals, size(small) margin(medsmall)) saving("LogLogisticCSstart", replace) drop cs km H cs2 km2 H2 cs3 km3 H3 cs4 km4 H4 gr combine "ExpCSstart" "WeibCSstart" "LogNormalCSstart" "LogLogisticCSstart", imargin(1 10 1 10) graphregion(margin(l=2 r=2)) saving("CoxSnell_start", replace) estimates stats exp weibull logN logL drop _est* /***LOG-LOGISTIC***/ stset, clear qui stset starting, failure(start) streg $xstart , dist(loglogistic) nolog predict median_start, median time predict mean_start, mean time summ mean_start median_start drop mean_start median_start stcurve, survival title("LogLog Survivor-starting") saving(logLsurvstart, replace) stcurve, hazard title("LogLog Hazard-starting") saving(logLhazstart, replace) stcurve, cumhaz title("LogLogCumulative Hazard-starting") saving(logLcumhazstart, replace) gr combine "logLsurvstart" "logLhazstart" "logLcumhazstart", saving(startingP, replace) /***EXERCISE 2a: compare predicted survival to observed survival: does the model fit well the observed data? Estimate a model based on a splitting mechanism and use Cox-Snell residuals test as well as Information Criteria to choose the best distribution. Compare predicted survival from the preferred model to observed survival***/ /***DURATION MODEL for QUITTING***/ /*Non-parametric analysis*/ tab quit su sm_years,de stset, clear stset sm_year, failure(quit) stsum stdes /***EXERCISE 2: GENERATE GRAPHS OF THE Survivor, HAZARD AND CUMULATIVE HAZARD FUNCTION***/ /***Parametric analysis***/ /*global of regressors*/ global xquit "sc12 sc45 lhqdg lhqhndA lhqnone lhqoth widow sepdiv single part unemp sick retd keephse wkshft1 rural suburb housown hou mothsmo fathsmo bothsmo smother male lnage lnyear" /***EXERCISE 3: COMPARE DISTRIBUTIONS USIGN INFORMATION CRITERIA and COX SNELL RESIDUALS TEST***/ /***EXERCISE 4: ESTIMATE THE PREFERRED MODEL in PH METRIC AND RECOVER THE COEFFICIENTS OF THE AFT METRIC REPORTED IN TABLE IN THE CHAPTER and COMMENT***/ /***EXERCISE 5: GRAPH THE SURVIVOR, HAZARD AND CUMUL.HAZARD FUNCTIONS AND CALCULATE THE PREDICTED MEAN AND MEDIAN SURVIVAL TIME and COMMENT***/ /***DURATION MODEL for LIFESPAN***/ /*Non-parametric analysis*/ tab death su lifespan, de stset, clear stset lifespan, failure(death) enter(age) sum age stsum stdes /***EXERCISE 6: GENERATE GRAPHS OF THE Survivor, HAZARD AND CUMULATIVE HAZARD FUNCTION and COMMENT***/ /***Parametric analysis***/ /*global of regressors*/ replace quit=0 if quit==. global xls "sc12 sc45 lhqdg lhqhndA lhqnone lhqoth part unemp sick retd keephse wkshft1 rural suburb male lnage start quit" /***EXERCISE 7: COMPARE DISTRIBUTIONS USIGN INFORMATION CRITERIA and COX SNELL RESIDUALS TEST***/ /***EXERCISE 8: ESTIMATE THE COEFFICIENTS and THE HAZARD RATIOS FROM THE PREFERRED MODEL, CALCULATE PREDICTED MEDIAN LIFESPAN and COMMENT***/ log close