/************** IVTAR.PRG for questions, contact Bruce E. Hansen Department of Economics Social Science Building University of Wisconsin Madison, WI 53706-1393 bhansen@ssc.wisc.edu http://www.ssc.wisc.edu/~bhansen/ This is a GAUSS procedure. It computes estimates and confidence intervals for threshold models with endogenous regressors. It computes the estimator described in "Instrumental Variable Estimation of a Threshold Model" written by Mehmet Caner and Bruce E. Hansen Be sure to have the Gauss graphics library active. If you run this program, it is currently set up to generate some simulated data and estimate the model on this data. For your own application, you can substitute your own data. *****************************************/ /* Example using Simulated Data */ n=100; kx=4; let sig[2,2]=1 .5 .5 1; e=rndn(n,2)*chol(sig); q=rndn(n,1); x=rndn(n,kx); z2=ones(n,1)~q; z1=x*ones(kx,1).*.3+e[.,2]; zz=(z1+z2*ones(2,1))*.1; y=zz.*(q.<0)-zz.*(q.>=0)+e[.,1]; qhat = gmm_thresh(y,z1,z2,x,q,.9,.8,1,1); /************************************/ /*************************************** GMM_THRESH Computes threshold model with endogenous variables Format qhat = gmm_thresh(y,z1,z2,x,q,_conf,_conf1,_conf2,_reduced); Inputs y nx1 dependent variable z1 nxk1 endogenous rhs variables z2 nxk2 exogenous rhs variables x nxm instrumental variables not included in z2, m>=k1 q nx1 threshold variable _conf 1x1 level for confidence interval for threshold, e.g. _conf=.9 _conf1 1x1 confidence level for threshold interval used as input to slope intervals, e.g. _conf2=.8 _conf2 1x1 interval dummy - to determine method for threshold interval used as input to slope intervals set to 0 for uncorrected (homoskedastic) interval set to 1 for heteroskedastic correction by quadratic set to 2 for heteroskedastic correction by nonparametric kernel _reduced 1x1 reduced form dummy set to 0 if reduced form is linear (no threshold) set to 1 if reduced form is a linear threshold model Output qhat 1x1 threshold estimate Most of the output is written to the screen. *******************************************************************/ proc gmm_thresh(y,z1,z2,x,q,_conf,_conf1,_conf2,_reduced); local xx,z1hat,rhohat,rhocf0,rhocf1,rhocf2,beta1,beta2,zhat,yhat,qhat, qcf0,qcf1,qcf2,z,da,db,ya,yb,xa,xb,za,zb,se1,se2,jstat1,jstat2,qq, beta1l,beta1u,beta2l,beta2u,qcf0i,qcf1i,qcf2i,qcf,i,qi,dai,dbi,beta1i,beta2i,se1i,se2i,jstat1i,jstat2i; xx=x~z2; if _reduced==0; z1hat=xx*regress(z1,xx); elseif _reduced==1; {z1hat,rhohat} = joint_thresh(z1,xx,q); endif; zhat=z1hat~z2; {yhat,qhat,qcf0,qcf1,qcf2} = joint_thresh_ci(y,zhat,q,_conf,1); z=z1~z2; da=(q.<=qhat); db=1-da; ya=selif(y,da); xa=selif(xx,da); za=selif(z,da); yb=selif(y,db); xb=selif(xx,db); zb=selif(z,db); {beta1,se1,jstat1} = gmm_linear(ya,za,xa); {beta2,se2,jstat2} = gmm_linear(yb,zb,xb); beta1l=beta1-se1*1.96; beta1u=beta1+se1*1.96; beta2l=beta2-se2*1.96; beta2u=beta2+se2*1.96; {yhat,qhat,qcf0i,qcf1i,qcf2i} = joint_thresh_ci(y,zhat,q,_conf1,0); if _conf2==0; qcf=qcf0i; elseif _conf2==1; qcf=qcf1i; elseif _conf2==2; qcf=qcf2i; endif; qq=unique(q,1); qq=selif(qq,(qq.<=qcf[2])); qq=selif(qq,(qq.>=qcf[1])); i=1; do while i<=rows(qq); qi=qq[i]; dai=(q.<=qi); dbi=1-dai; ya=selif(y,dai); xa=selif(xx,dai); za=selif(z,dai); yb=selif(y,dbi); xb=selif(xx,dbi); zb=selif(z,dbi); {beta1i,se1i,jstat1i} = gmm_linear(ya,za,xa); {beta2i,se2i,jstat2i} = gmm_linear(yb,zb,xb); beta1l=minc(((beta1i-se1i*1.96)~beta1l)'); beta1u=maxc(((beta1i+se1i*1.96)~beta1u)'); beta2l=minc(((beta2i-se2i*1.96)~beta2l)'); beta2u=maxc(((beta2i+se2i*1.96)~beta2u)'); i=i+1;endo; "";""; "Threshold Estimate " qhat; "Confidence Interval - Uncorrected " qcf0; "Confidence Interval - Het Corrected Quad " qcf1; "Confidence Interval - Het Corrected NP " qcf2; "";""; "Regime 1 : Threshold variable less than " qhat; "Number of observations " sumc(da);""; " Estimates S.E. Lower Upper"; beta1~se1~beta1l~beta1u; ""; "Regime 2 : Threshold variable greater than " qhat; "Number of observations " sumc(db);""; " Estimates S.E. Lower Upper"; beta2~se2~beta2l~beta2u; ""; retp(qhat); endp; /*************************************** JOINT_THRESH Estimates the threshold in a multivariate threshold model Format {yhat,qhat} = joint_thresh(y,x,q); Inputs y nxm dependent variable(s) x nxk independent variables (should include constant) q nx1 threshold variable Outputs yhat nxm Predicted values for y qhat 1x1 Threshold Estimate *******************************************************************/ proc (2) = joint_thresh(y,x,q); local n,k,e,s0,qs,qn,sn,n1,n2,r,d,xx,ex,qhat,x1,x2,beta1,beta2,yhat,smin; n = rows(y); k=cols(x); e=y-x*regress(y,x); s0=det(moment(e,0)); n1=round(.05*n)+k; n2=round(.95*n)-k; qs=sortc(q,1); qs=qs[n1:n2]; qs = unique(qs,1); qn = rows(qs); sn = zeros(qn,1); r=1; do while r<=qn; d=(q.<=qs[r]); xx=x.*d; xx=xx-x*regress(xx,x); ex=e-xx*regress(e,xx); sn[r]=det(moment(ex,0)); r=r+1;endo; r=minindc(sn); smin=sn[r]; qhat=qs[r]; d=(q.<=qhat); x1=x.*d; x2=x.*(1-d); beta1=regress(y,x1); beta2=regress(y,x2); yhat=x1*beta1+x2*beta2; retp(yhat,qhat); endp; /*************************************** JOINT_THRESH_CI Estimates the threshold in a multivariate threshold model and computes asymptotic confidence intervals Format {yhat,qhat,qcf_0,qcf_h1,qcf_h2} = joint_thresh(y,x,q,_conf,_graph); Inputs y nxm dependent variable(s) x nxk independent variables (should include constant) q nx1 threshold variable _conf 1x1 level for confidence interval, e.g. _conf=.9 _graph 1x1 Set _graph=1 to view graph of likelihood ratio Set _graph=0 to not display graph Outputs yhat nxm Predicted values for y qhat 1x1 Threshold Estimate qcf_0 1x2 Confidence interval for threshold, no heteroskedasticity correction qcf_h1 1x2 Confidence interval for threshold, heteroskedasticity correction using quadratic variance estimate (only if m=1) qcf_h2 1x2 Confidence interval for threshold, heteroskedasticity correction using variance estimated by Epanechnikov kernel with automatic bandwidth (only if m=1) *******************************************************************/ proc (5) = joint_thresh_ci(y,x,q,_conf,_graph); local n,k,e,s0,qs,qn,sn,n1,n2,r,d,xx,ex,qhat,x1,x2,beta1,beta2,yhat, smin,lr,r1,r2,qx,qh,m1,m2,g1,g2,sigq,hband,u,u2,f,df,eps,sige,kh,eta2,eta1, c1,lr0,lr1,lr2,clr,tit,qcf_0,qcf_h1,qcf_h2,sig2; n = rows(y); k=cols(x); e=y-x*regress(y,x); s0=det(moment(e,0)); n1=round(.05*n)+k; n2=round(.95*n)-k; qs=sortc(q,1); qs=qs[n1:n2]; qs = unique(qs,1); qn = rows(qs); sn = zeros(qn,1); r=1; do while r<=qn; d=(q.<=qs[r]); xx=x.*d; xx=xx-x*regress(xx,x); ex=e-xx*regress(e,xx); sn[r]=det(moment(ex,0)); r=r+1;endo; r=minindc(sn); smin=sn[r]; qhat=qs[r]; d=(q.<=qhat); x1=x.*d; x2=x.*(1-d); beta1=regress(y,x1); beta2=regress(y,x2); yhat=x1*beta1+x2*beta2; e=y-yhat; lr = n*(sn/smin-1); sig2=smin/n; if cols(y) .> 1; eta1=1; eta2=1; else; r1 = (x*(beta1-beta2)).^2; r2 = r1.*(e.^2); qx = q.^(0~1~2); qh = qhat.^(0~1~2); m1 = r1/qx; m2 = r2/qx; g1 = qh*m1; g2 = qh*m2; eta1=(g2/g1)/sig2; sigq = sqrt(meanc((q-meanc(q)).^2)); hband = 2.344*sigq/(n^(.2)); u = (qhat-q)/hband; u2 = u.^2; f = meanc((1-u2).*(u2.<=1))*(.75/hband); df = -meanc(-u.*(u2.<=1))*(1.5/(hband^2)); eps = r1 - qx*m1; sige = (eps'eps)/(n-3); hband = sige/(4*f*((m1[3]+(m1[2]+2*m1[3]*qhat)*df/f)^2)); u2 = ((qhat-q)/hband).^2; kh = ((1-u2)*.75/hband).*(u2.<=1); g1 = meanc(kh.*r1); g2 = meanc(kh.*r2); eta2 = (g2/g1)/sig2; endif; c1 = -2*ln(1-sqrt(_conf)); lr0 = (lr .>= c1); lr1 = (lr .>= (c1*eta1)); lr2 = (lr .>= (c1*eta2)); if maxc(lr0)==1; qcf_0=qs[minindc(lr0)]~qs[qn+1-minindc(rev(lr0))]; else; qcf_0=qs[1]~qs[qn]; endif; if maxc(lr1)==1; qcf_h1=qs[minindc(lr1)]~qs[qn+1-minindc(rev(lr1))]; else; qcf_h1=qs[1]~qs[qn]; endif; if maxc(lr2)==1; qcf_h2=qs[minindc(lr2)]~qs[qn+1-minindc(rev(lr2))]; else; qcf_h2=qs[1]~qs[qn]; endif; if _graph==1; graphset; _pdate = ""; fonts("complex simgrma"); title("Confidence Interval Construction for Threshold"); ylabel("Likelihood Ratio Sequence in \202g\201"); tit = "Threshold Variable"; xlabel(tit); _pxsci = 8; _pysci = 8; clr = ones(qn,1)*c1; _plegstr="" $+ "LR]n[(\202g\201)\000" $+ ftocv(_conf*100,2,0) $+ "% Critical"; if cols(y) .== 1; clr=clr~(clr*eta1)~(clr*eta2); _plegstr = _plegstr $+ "\000Hetero Corrected - 1\000Hetero Corrected - 2"; endif; _plegctl = 1; let _pltype = 6 4 3 1; _plwidth =2; xy(qs,lr~clr); endif; retp(yhat,qhat,qcf_0,qcf_h1,qcf_h2); endp; /*************************************** GMM_LINEAR Computes the GMM estimator of a linear model Format {beta,se,jstat} = gmm_linear(y,z,x); Inputs y nx1 dependent variable z nxk rhs variables x nxl instruments variables (should include constant and exogenous parts of z), l>=k Outputs beta kx1 Regression slope estimates se kx1 standard errors jstat 1x1 J Statistic *******************************************************************/ proc (3) = gmm_linear(y,z,x); local pihat,xz,xy,beta,e,g,v,se,m,jstat; pihat=regress(z,x); xz=x'z; xy=x'y; beta=(pihat'xy)/(pihat'xz); e=y-z*beta; g=invpd(moment(x.*e,0)); v=invpd(xz'g*xz); beta=v*(xz'g*xy); se=sqrt(diag(v)); m=x'(y-z*beta); jstat=m'g*m; retp(beta,se,jstat); endp; /*************************************** REGRESS Computes a linear regression. Uses generalized inverse if X'X is singular Format beta = regress(y,x); Inputs y nxm dependent variable(s) x nxk independent variables (should include constant) Output beta kxm Regression slope estimates *******************************************************************/ proc regress(y,x); local beta; trap 1; beta=y/x; if scalerr(beta); beta=pinv(moment(x,0))*(x'y); endif; trap 0; retp(beta); endp;