***************************************************************************************************************
* 
* STATA CODE FOR ANALYSIS OF HEAVY-TAILED, NON-NORMAL DATA USING FLEXIBLE PARAMETRIC DISTRIBUTIONS
*
***************************************************************************************************************
* James Lomas
*
* DISCLAIMER: The following code is provided as is and should be used at your own risk. 
* No user support is available.
* 
* Last updated 30th September 2015
***************************************************************************************************************

*Throughout dependent variable is called by using a global $y and set of regressors are called by using a global $xs.

*This code provides the tools to estimate models used in three papers

*Jones AM, Lomas J, Rice N. Healthcare cost regressions: going beyond the mean to estimate the full distribution. Health Economics 2015;24(9):1192-1212.
*Jones AM, Lomas J, Rice N. Applying beta-type size distributions to healthcare cost regressions. Journal of Applied Econometrics 2014;29(4):649-670.
*Jones AM, Lomas J, Moore P, Rice N. A quasi-Monte Carlo comparison of developments in parametric and semi-parametric regression methods for heavy tailed and non-normal data: with an application to healthcare costs. Health Econometrics and Data Group (HEDG), University of York 2013;working paper 13/30. (forthcoming in JRRSA)

  /************************************************************/
 /*Contents - press CTRL + G to advance to chosen line number*/
/************************************************************/

*Parametric models: estimation and prediction of means and tail probabilities

**Beta-type models, lines 38 - 314

**Duration models, lines 315 - 523

**Finite mixture models, lines 524 - 637

  /*******************/
 /*Parametric models*/
/*******************/

  /******************/
 /*Beta-type models*/
/******************/

/*GB2 (homoskedastic) with log-link*/

*Input log-likelihood

program gb2log

args lnf lnb a p q

local y "$ML_y1"

quietly {
replace `lnf' = ln(`a') + (`a'*abs(`p') - 1)*ln(`y') - (abs(`p') +abs(`q'))*ln(1+(`y'/exp(`lnb'))^`a') - `a'*abs(`p')*`lnb' - lngamma(abs(`p')) -lngamma(abs(`q')) + lngamma(abs(`p')+abs(`q'))
}
		
end

*Estimate GB2 (homoskedastic) with log-link

capture noisily ml model lf gb2log (lnb: $y = $xs) /a /p /q, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double a_gb2log, eq(a) 
predict double p_gb2log, eq(p) 
predict double q_gb2log, eq(q)

gen double lnb_gb2log=[lnb]_b[_cons]
foreach var of varlist $xs {
replace lnb_gb2log=lnb_gb2log+[lnb]_b[`var']*`var'
 }
gen b_gb2log = exp(lnb_gb2log)

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_gb2log=(b_gb2log *exp(lngamma(p_gb2log+(1/a_gb2log)))*(exp(lngamma(q_gb2log-(1/a_gb2log)))))/((exp(lngamma(p_gb2log)))*(exp(lngamma(q_gb2log))))

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_gb2log = 1 - ibeta(p_gb2log,q_gb2log,((10000/b_gb2log)^a_gb2log)/(1+(10000/b_gb2log)^a_gb2log))

/*GB2 (homoskedastic) with sqrt-link*/

*Input log-likelihood

program gb2sqrt

args lnf sqrtb a p q

local y "$ML_y1"

quietly {
replace `lnf' = ln(`a') + (`a'*abs(`p') - 1)*ln(`y') - (abs(`p') +abs(`q'))*ln(1+(`y'/(`sqrtb')^2)^`a') - `a'*abs(`p')*ln((`sqrtb')^2) - lngamma(abs(`p')) -lngamma(abs(`q')) + lngamma(abs(`p')+abs(`q'))
}
		
end

*Estimate GB2 (homoskedastic) with sqrt-link

capture noisily ml model lf gb2sqrt (sqrtb: $y = $xs) /a /p /q, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double a_gb2sqrt, eq(a) 
predict double p_gb2sqrt, eq(p) 
predict double q_gb2sqrt, eq(q)

gen double sqrtb_gb2sqrt=[sqrtb]_b[_cons]
foreach var of varlist $xs {
replace sqrtb_gb2sqrt=sqrtb_gb2sqrt+[sqrtb]_b[`var']*`var'
 }
gen b_gb2sqrt = (sqrtb_gb2sqrt)^2

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_gb2sqrt=(b_gb2sqrt *exp(lngamma(p_gb2sqrt+(1/a_gb2sqrt)))*(exp(lngamma(q_gb2sqrt-(1/a_gb2sqrt)))))/((exp(lngamma(p_gb2sqrt)))*(exp(lngamma(q_gb2sqrt))))

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_gb2sqrt = 1 - ibeta(p_gb2sqrt,q_gb2sqrt,((10000/b_gb2sqrt)^a_gb2sqrt)/(1+(10000/b_gb2sqrt)^a_gb2sqrt))

/*Singh-Maddala*/

*Input log-likelihood

program smlog

args lnf lnb a q

local y "$ML_y1"

quietly {
replace `lnf' = ln(`a') + ln(`q') + (`a' - 1)*ln(`y') - (1 +`q')*ln(1+(`y'/exp(`lnb'))^`a') - `a'*`lnb'
}
		
end

*Estimate Singh-Maddala

capture noisily ml model lf smlog (lnb: $y = $xs) /a /q, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double a_smlog, eq(a) 
predict double q_smlog, eq(q)

gen double lnb_smlog=[lnb]_b[_cons]
foreach var of varlist $xs {
replace lnb_smlog=lnb_smlog+[lnb]_b[`var']*`var'
 }
gen b_smlog = exp(lnb_smlog)

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_smlog=(b_smlog*exp(lngamma(1+(1/a_smlog)))*exp(lngamma(q_smlog-(1/a_smlog)))/exp(lngamma(q_smlog)))

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_smlog = (1+(10000/b_smlog)^a_smlog)^-q_smlog

/*Dagum*/

*Input log-likelihood

program dagumlog

args lnf lnb a p 

local y "$ML_y1"

quietly {
replace `lnf' = ln(`a') + ln(`p') + (`a'*`p' - 1)*ln(`y') - (`p' + 1)*ln(1+(`y'/exp(`lnb'))^`a') - `a'*`p'*`lnb'
}
		
end

*Estimate Dagum

capture noisily ml model lf dagumlog (lnb: $y = $xs) /a /p, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double a_dagumlog, eq(a) 
predict double p_dagumlog, eq(p)

gen double lnb_dagumlog=[lnb]_b[_cons]
foreach var of varlist $xs {
replace lnb_dagumlog=lnb_dagumlog+[lnb]_b[`var']*`var'
 }
gen b_dagumlog = exp(lnb_dagumlog)

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_dagumlog=(b_dagumlog*exp(lngamma(p_dagumlog+(1/a_dagumlog)))*exp(lngamma(1-(1/a_dagumlog)))/exp(lngamma(p_dagumlog)))

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_dagumlog = 1 - (1+(10000/b_dagumlog)^-a_dagumlog)^-p_dagumlog

/*B2*/

*Input log-likelihood

program b2log

args lnf lnb p q

local y "$ML_y1"

quietly {
		replace `lnf' = (abs(`p') - 1)*ln(`y') - (abs(`p') +abs(`q'))*ln(1+`y'/exp(`lnb')) - abs(`p')*`lnb' - lngamma(abs(`p')) -lngamma(abs(`q')) + lngamma(abs(`p')+abs(`q'))
		}
		
end

*Estimate B2

capture noisily ml model lf b2log (lnb: $y = $xs) /p /q, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double p_b2log, eq(p) 
predict double q_b2log, eq(q)

gen double lnb_b2log=[lnb]_b[_cons]
foreach var of varlist $xs {
replace lnb_b2log=lnb_b2log+[lnb]_b[`var']*`var'
 }
gen b_b2log = exp(lnb_b2log)

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_b2log = (b_b2log*p_b2log/(q_b2log-1))

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_b2log = 1 - ibeta(p_b2log,q_b2log,(10000/b_b2log)/(1+(10000/b_b2log)))

/*Lomax*/

*Input log-likelihood

program lomaxlog

args lnf lnb q

local y "$ML_y1"

quietly {
replace `lnf' = ln(`q') - (1 +`q')*ln(1+(`y'/exp(`lnb'))) - `lnb'
}
		
end

*Estimate Lomax

capture noisily ml model lf lomaxlog (lnb: $y = $xs) /q, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double q_lomaxlog, eq(q)

gen double lnb_lomaxlog=[lnb]_b[_cons]
foreach var of varlist $xs {
replace lnb_lomaxlog=lnb_lomaxlog+[lnb]_b[`var']*`var'
 }
gen b_lomaxlog = exp(lnb_lomaxlog)

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_lomaxlog = (b_lomaxlog/(q_lomaxlog-1))

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_lomaxlog = (1+(10000/b_lomaxlog))^-q_lomaxlog

/*Fisk*/

*Input log-likelihood

program fisklog

args lnf lnb a 

local y "$ML_y1"

quietly {
replace `lnf' = ln(`a') + (`a' - 1)*ln(`y') - 2*ln(1+(`y'/exp(`lnb'))^`a') - `a'*`lnb'
}
		
end

*Estimate Fisk

capture noisily ml model lf fisklog (lnb: $y = $xs) /a, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double a_fisklog, eq(a)

gen double lnb_fisklog=[lnb]_b[_cons]
foreach var of varlist $xs {
replace lnb_fisklog=lnb_fisklog+[lnb]_b[`var']*`var'
 }
gen b_fisklog = exp(lnb_fisklog)

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_fisklog = (b_fisklog*exp(lngamma(1+(1/a_fisklog)))*exp(lngamma(1-(1/a_fisklog))))

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_fisklog = 1 - (1+(10000/b_fisklog)^-a_fisklog)^-1

  /*****************/
 /*Duration models*/
/*****************/

/*Generalised Gamma*/

*Input log-likelihood

program gglog

args lnf mu sigma kappa

local y "$ML_y1"

quietly {
replace `lnf' = ln(abs(`kappa')) - ln(`sigma') - ln(`y') - lngamma(`kappa'^-2) + ln(((`y'/exp(`mu'))^(`kappa'/`sigma'))/`kappa'^2)/`kappa'^2 - ((`y'/exp(`mu'))^(`kappa'/`sigma'))/`kappa'^2
}
		
end

*Estimate Generalised Gamma

capture noisily ml model lf gglog (mu: $y = $xs) /sigma /kappa, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double sigma_gglog, eq(sigma)
predict double kappa_gglog, eq(kappa)

gen double lnb_gglog=[mu]_b[_cons]
foreach var of varlist $xs {
replace lnb_gglog=lnb_gglog+[mu]_b[`var']*`var'
 }

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_gglog = exp(lnb_gglog + (sigma_gglog/kappa_gglog)*ln(kappa_gglog^2) + lngamma(1/(kappa_gglog^2) + sigma_gglog/kappa_gglog) - lngamma(1/kappa_gglog^2))

*Prediction of Pr(Y>10000|X) - 'tail probability'

if kappa_gglog>0 {
gen tp_10000_gglog = 1 - gammap(kappa_gglog^-2, (kappa_gglog^-2)*exp((kappa_gglog/sigma_gglog)*(ln(10000)-lnb_gglog)))
}
	 
else{
gen tp_10000_gglog = gammap(kappa_gglog^-2, (kappa_gglog^-2)*exp((kappa_gglog/sigma_gglog)*(ln(10000)-lnb_gglog)))
}

/*Gamma*/

*Input log-likelihood

program gammalog

args lnf mu kappa

local y "$ML_y1"

quietly {
replace `lnf' = - ln(`y') - lngamma(`kappa'^-2) + ln((`y'/exp(`mu'))/`kappa'^2)/`kappa'^2 - (`y'/exp(`mu'))/`kappa'^2
}
		
end

*Estimate Gamma

capture noisily ml model lf gammalog (mu: $y = $xs) /kappa, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double kappa_gammalog, eq(kappa)

gen double lnb_gammalog=[mu]_b[_cons]
foreach var of varlist $xs {
replace lnb_gammalog=lnb_gammalog+[mu]_b[`var']*`var'
 }


*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_gammalog = exp(lnb_gammalog + ln(kappa_gammalog^2) + lngamma(1/(kappa_gammalog^2) + 1) - lngamma(1/kappa_gammalog^2))

*Prediction of Pr(Y>10000|X) - 'tail probability'

if kappa_gammalog>0 {
gen tp_10000_gammalog = 1 - gammap(kappa_gammalog^-2, (kappa_gammalog^-2)*exp(ln(10000)-lnb_gammalog))
} 	 
else{
gen tp_10000_gammalog = gammap(kappa_gammalog^-2, (kappa_gammalog^-2)*exp(ln(10000)-lnb_gammalog))
}

/*Lognormal*/

*Input log-likelihood

program lognormlog

args lnf mu sigma

local y "$ML_y1"

quietly {
replace `lnf' = -ln(`y') - ln(`sigma') - 0.5*ln(2*_pi) - ((ln(`y') - `mu')^2)/(2*(`sigma'^2))
}
		
end

*Estimate Lognormal

capture noisily ml model lf lognormlog (mu: $y = $xs) /sigma, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double sigma_lognormlog, eq(sigma)

gen double lnb_lognormlog=[mu]_b[_cons]
foreach var of varlist $xs {
replace lnb_lognormlog=lnb_lognormlog+[mu]_b[`var']*`var'
 }

gen b_lognormlog = exp(lnb_lognormlog)
 
*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_lognormlog = ((b_lognormlog)^1)*exp((0.5*(1^2)*(sigma_lognormlog^2)))

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_lognormlog = 1 - normal((ln(10000) - lnb_lognormlog)/sigma_lognormlog)


/*Weibull*/

*Input log-likelihood

program weilog

args lnf mu sigma

local y "$ML_y1"

quietly {
replace `lnf' = -ln(`y') - ln(`sigma') + ln(`y')/`sigma' - `mu'/`sigma' - (`y'/exp(`mu'))^(1/`sigma')
}
		
end

*Estimate Weibull

capture noisily ml model lf weilog (mu: $y = $xs) /sigma, technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

predict double sigma_weilog, eq(sigma)

gen double lnb_weilog=[mu]_b[_cons]
foreach var of varlist $xs {
replace lnb_weilog=lnb_weilog+[mu]_b[`var']*`var'
 }

gen b_weilog = exp(lnb_weilog)

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_weilog =(b_weilog)*exp(lngamma(1+sigma_weilog))

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_weilog = exp(-(10000/b_weilog)^(1/sigma_weilog))


/*Exponential*/

*Input log-likelihood

program explog

args lnf mu

local y "$ML_y1"

quietly {
replace `lnf' = - `mu' - `y'/exp(`mu')
}
		
end

*Estimate Exponential

capture noisily ml model lf explog (mu: $y = $xs), technique(nr 50 bfgs 50 dfp 50 bhhh 50)
capture noisily ml search
capture noisily ml max

gen double lnb_explog=[mu]_b[_cons]
foreach var of varlist $xs {
replace lnb_explog=lnb_explog+[mu]_b[`var']*`var'
 }
 
gen b_explog = exp(lnb_explog)

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_explog = b_explog

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_explog = exp(-10000/b_explog)


  /***********************/
 /*Finite mixture models*/
/***********************/

/*FMM 2-component Gamma with log-link*/

*Input log-likelihood

program fmmgammamixlog

args lnf lnb1 alpha1 lnb2 alpha2 imlogitpi1
tempvar f1 f2 pi

local y "$ML_y1"

quietly {
		gen double `pi' = exp(`imlogitpi1')/(1+exp(`imlogitpi1'))
		gen double `f1' = gammaden(abs(`alpha1'),exp(`lnb1'),0,`y')
		gen double `f2' = gammaden(abs(`alpha2'),exp(`lnb2'),0,`y')
		replace `lnf' = ln(`pi'*`f1'+(1-`pi')*`f2')
		}
		
end

*Estimate FMM 2-component Gamma with log-link

capture noisily glm $y $xs, f(gam) l(log) technique(nr 50 bfgs 50 dfp 50 bhhh 50)
mat lngamm = e(b)
capture noisily ml model lf fmmgammamixlog (lnb1: $y = $xs) /alpha1 (lnb2: $y = $xs) /alpha2 /imlogitpi1, technique(nr 50 bfgs 50 dfp 50 bhhh 50) init(lngamm 5 lngamm 5 0, copy)
capture noisily ml search
capture noisily ml max
mat drop lngamm

gen double lnb1_fmmgammamixlog=[lnb1]_b[_cons]
foreach var of varlist $xs {
replace lnb1_fmmgammamixlog=lnb1_fmmgammamixlog+[lnb1]_b[`var']*`var'
 }
 
gen b1_fmmgammamixlog = exp(lnb1_fmmgammamixlog)

gen double lnb2_fmmgammamixlog=[lnb2]_b[_cons]
foreach var of varlist $xs {
replace lnb2_fmmgammamixlog=lnb2_fmmgammamixlog+[lnb2]_b[`var']*`var'
 }
 
gen b2_fmmgammamixlog = exp(lnb2_fmmgammamixlog)

gen alpha1_fmmgammamixlog = [alpha1]_b[_cons] 
gen alpha2_fmmgammamixlog = [alpha2]_b[_cons]
gen pi1_fmmgammamixlog = exp([imlogitpi1]_b[_cons])/ (1 + exp([imlogitpi1]_b[_cons])) 

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_fmmgammamixlog = pi1_fmmgammamixlog*alpha1_fmmgammamixlog*b1_fmmgammamixlog + (1 - pi1_fmmgammamixlog)*alpha2_fmmgammamixlog*b2_fmmgammamixlog

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_fmmgammamixlog = 1 - (pi1_fmmgammamixlog*gammap(alpha1_fmmgammamixlog, 10000/b1_fmmgammamixlog) + (1 - pi1_fmmgammamixlog)*gammap(alpha2_fmmgammamixlog, 10000/b2_fmmgammamixlog))

/*FMM 2-component Gamma with sqrt-link*/

*Input log-likelihood

program fmmgammamixsqrt

args lnf sqrtb1 alpha1 sqrtb2 alpha2 imlogitpi1
tempvar f1 f2 pi

local y "$ML_y1"

quietly {
		gen double `pi' = exp(`imlogitpi1')/(1+exp(`imlogitpi1'))
		gen double `f1' = gammaden(abs(`alpha1'),(`sqrtb1')^2,0,`y')
		gen double `f2' = gammaden(abs(`alpha2'),(`sqrtb2')^2,0,`y')
		replace `lnf' = ln(`pi'*`f1'+(1-`pi')*`f2')
		}
		
end

*Estimate FMM 2-component Gamma with sqrt-link

capture noisily glm $y $xs, f(gam) l(power 0.5) technique(nr 50 bfgs 50 dfp 50 bhhh 50)
mat sqgamm = e(b)
capture noisily ml model lf fmmgammamixsqrt (sqrtb1: $y = $xs) /alpha1 (sqrtb2: $y = $xs) /alpha2 /imlogitpi1, technique(nr 50 bfgs 50 dfp 50 bhhh 50) init(sqgamm 5 sqgamm 5 0, copy)
capture noisily ml search
capture noisily ml max
mat drop sqgamm

gen double sqrtb1_fmmgammamixsqrt=[sqrtb1]_b[_cons]
foreach var of varlist $xs {
replace sqrtb1_fmmgammamixsqrt=sqrtb1_fmmgammamixsqrt+[sqrtb1]_b[`var']*`var'
 }
 
gen b1_fmmgammamixsqrt = (sqrtb1_fmmgammamixsqrt)^2

gen double sqrtb2_fmmgammamixsqrt=[sqrtb2]_b[_cons]
foreach var of varlist $xs {
replace sqrtb2_fmmgammamixsqrt=sqrtb2_fmmgammamixsqrt+[sqrtb2]_b[`var']*`var'
 }
 
gen b2_fmmgammamixsqrt = (sqrtb2_fmmgammamixsqrt)^2

gen alpha1_fmmgammamixsqrt = [alpha1]_b[_cons] 
gen alpha2_fmmgammamixsqrt = [alpha2]_b[_cons]
gen pi1_fmmgammamixsqrt = exp([imlogitpi1]_b[_cons])/ (1 + exp([imlogitpi1]_b[_cons])) 

*Prediction of E(Y|X) - 'conditional mean'

gen double cmean_fmmgammamixsqrt = pi1_fmmgammamixsqrt*alpha1_fmmgammamixsqrt*b1_fmmgammamixsqrt + (1 - pi1_fmmgammamixsqrt)*alpha2_fmmgammamixsqrt*b2_fmmgammamixsqrt

*Prediction of Pr(Y>10000|X) - 'tail probability'

gen tp_10000_fmmgammamixsqrt = 1 - (pi1_fmmgammamixsqrt*gammap(alpha1_fmmgammamixsqrt, 10000/b1_fmmgammamixsqrt) + (1 - pi1_fmmgammamixsqrt)*gammap(alpha2_fmmgammamixsqrt, 10000/b2_fmmgammamixsqrt))