#########################################################################
##  URREG.R
##  
##  Procedures to implement unit root testing with regression covariates.
##  
##  Bruce E. Hansen
##  Department of Economics
##  Social Science Building
##  University of Wisconsin
##  Madison, WI 53706-1393
##  behansen@wisc.edu
##  http://www.ssc.wisc.edu/~bhansen/
##  
##  This file contains four R procedures:
##  UR_REG, LR_VAR, UR_CRITS, UR_ADF.
##  
##  
##  (1) UR_REG produces the coefficient and studentized statistics
##  for unit roots outlined in B. Hansen "Rethinking the univariate approach
##  to unit root testing: Using covariates to increase power" 
##  Econometric Theory (1995).
##  
##  The format is:
##  
##   out <- UR_REG(y,x,p,q,k1,k2)
##   out$tstat 
##  
##  The inputs are:
##  
##  y   -  dependent variable, Tx1.
##  x   -  right-hand-side variables, Txk.
##  p   -  order of deterministic time trend.
##         if p=-1, then none included
##         if p=0,  then constant included
##         if p=1,  then constant and time trend included.
##  q   -  number of lagged dy terms
##  k1  -  number of lagged x terms
##  k2  -  number of lead x terms
##  
##  The outputs are:
##  
##  tstat   -  studentized coefficient test
##  crits   -  asymptotic critical values (1%, 5%, 10%) for tstat
##  rho2    -  estimated "Rho-Squared"
##  sig2    -  estimated error variance
##  
##  Notes:
##  
##  If k1=k2=0, then the variable x appears on the right-hand-side of the
##  regression.  This is useful if the user wishes to directly control the
##  context of the included covariates.
##  
##  The variable "x" needs to be stationary.  In most cases, it will be the
##  first-difference of some levels variable.  (In the notation of the paper,
##  it should be "dx").
##  
##  
##  (2) LR_VAR  calculates a "Long-Run Variance" for a vector-valued
##  series, using the suggestions of Andrews (1991).  The format is:
##  
##  out <- LR_VAR(e)
##  out$omega
##
##  where e is a Txk matrix of observations, omega is a kxk covariance
##  matrix, and bandw is the bandwidth used in estimation of omega.
##  
##  
##  Both UR_REG and LR_VAR depend on the following set of global variables.
##  
##  kernel    -  indicator for kernel method
##               if kernel = 1, then Parzen kernel is used
##               if kernel = 2, then Quadratic Spectral kernel is used
##               if kernel = 3, then Bartlett (Newey-West) kernel is used
##  band      -  bandwidth used to calculate long-run variance matrix
##               if band = 0, then the bandwidth is calculated using
##               the suggestion of Andrews (1991)
##  white     -  pre-whiten error indicator
##               if white = 1, then the errors are pre-whitened with 
##               a VAR(1) before kernel is applied.
##  urprint   -  print to screen supressor
##               if urprint = 0, then results (for UR_REG and UR_ADF) are 
##               printed to screen
##               if urprint = 1, then no printing to screen
##              (useful for Monte Carlo runs)
##  
##  Global Defaults are set below
##  Since they are globals, they may be changed in a program if used.
##  
##  
##  (3)  UR_CRITS gives asymptotic critical values for the unit root tests.
##  The format is
##  
##  crits <- ur_crits(rho2,p)
##  
##  The inputs are:
##  
##  rho2  -  R-squared measure, as given in UR_REG output
##  p     -  order of deterministic time trend, as given in UR_REG input
##           if p=-1, then none included
##           if p= 0, then constant included
##           if p= 1, then constant and time trend included.
##  
##  The outputs are:
##  
##  crits - asymptotic 1%, 5%, 10% critical values for tstat
##  
##  
##  (4)  UR_ADF
##  
##  Format:
##  
##  out <- UR_ADF(y,p,q)
##  out$tstat
##
##  The inputs are:
##  
##  y   -  dependent variable, Tx1.
##  p   -  order of deterministic time trend.
##         if p=-1, then none included
##         if p= 0, then constant included
##         if p= 1, then constant and time trend included.
##  q   -  number of lagged dy terms
##  
##  The outputs are:
##  
##  tstat  -  studentized coefficient test
##  sig2   -  estimated error variance
##  
##  Print-to-screen is determined by the global variable urprint,
##  documented above.
##  
#########################################################################

# GLOBAL VARIABLES: #

kernel  <- 1
band    <- 0
urprint <- 1
white   <- 0


# PROCEDURE CODE: #

lr_var <- function(u){
tu <- nrow(u)
p <- ncol(u)
if (white == 1){
  te <- tu-1
  au <- qr.solve(as.matrix(u[1:te,]),as.matrix(u[2:tu,]))
  e <- as.matrix(u[2:tu,]) - as.matrix(u[1:te,])%*%au
}else{
  e <- u
  te <- tu
}
if (band == 0){
 eb <- as.matrix(e[1:(te-1),])
 ef <- as.matrix(e[2:te,])
 ae <- as.matrix(colSums(eb*ef)/colSums(eb^2))
 ee <- ef - eb*(matrix(1,nrow(eb),1)%*%t(ae))
 se <- as.matrix(colMeans(ee^2))
 ad <- sum((se/((1-ae)^2))^2)
 a1 <- 4*sum((ae*se/(((1-ae)^3)*(1+ae)))^2)/ad
 a2 <- 4*sum((ae*se/((1-ae)^4))^2)/ad
 if (kernel == 2){                   # Quadratic Spectral #
     bandw <- 1.3221*((a2*te)^.2)
 }   
 if (kernel == 1){                   # Parzen #
     bandw <- 2.6614*((a2*te)^.2)               
     if (bandw > (te-2)) bandw <- te-2
 }
 if (kernel == 3){                   # Bartlett #   
     bandw <- 1.1447*((a1*te)^.333)
     if (bandw > (te-2)) bandw <- te-2
 }
}else{
 bandw <- band
}

# Estimate Covariances #
if (kernel == 2){                             # Quadratic Spectral Kernel #
  tm <- te-1
  jb <- as.matrix(seq(1,tm,1)/bandw)
  jband <- jb*1.2*pi
  kern <- ((sin(jband)/jband - cos(jband))/(jband^2))*3
}
if (kernel == 1){                             # Parzen kernel #
  tm <- floor(bandw)
  if (tm > 0){
   jb <- as.matrix(seq(1,tm,1)/bandw)
   kern <- (1 - (jb^2)*6 + (jb^3)*6)*(jb <= .5)
   kern <- kern + ((1-jb)^3)*(jb > .5)*2
  }
}
if (kernel == 3){                             # Bartlett kernel #
  tm <- floor(bandw)
  if (tm > 0) kern <- as.matrix(1 - seq(1,tm,1)/bandw)
}

lam <- matrix(0,p,p)
for (j in 1:tm){
  kj <- kern[j]
  lam <- lam + (t(as.matrix(e[1:(te-j),]))%*%as.matrix(e[(1+j):te,]))*kj
}
omega <- (t(e)%*%e + lam + t(lam))/te

if (white == 1){
  eau <- solve(diag(p) - au)
  omega <- t(eau)%*%omega%*%eau
}
list(omega=omega,bandw=bandw)
}

#*******************************************************************#

ur_crits <- function(r2,p){

if (p == -1) crt <- matrix( c(
-2.4611512,       -1.7832090,       -1.4189957,
-2.4943410,       -1.8184897,       -1.4589747,
-2.5152783,       -1.8516957,       -1.5071775,
-2.5509773,       -1.8957720,       -1.5323511,
-2.5520784,       -1.8949965,       -1.5418830,
-2.5490848,       -1.8981677,       -1.5625462,
-2.5547456,       -1.9343180,       -1.5889045,
-2.5761273,       -1.9387996,       -1.6020210,
-2.5511921,       -1.9328373,       -1.6128210,
-2.5658,          -1.9393,          -1.6156),
10,3,byrow=TRUE)

if (p==0) crt <- matrix( c(
-2.7844267,       -2.1158290,       -1.7525193,
-2.9138762,       -2.2790427,       -1.9172046,
-3.0628184,       -2.3994711,       -2.0573070,
-3.1376157,       -2.5070473,       -2.1680520,
-3.1914660,       -2.5841611,       -2.2520173,
-3.2437157,       -2.6399560,       -2.3163270,
-3.2951006,       -2.7180169,       -2.4085640,
-3.3627161,       -2.7536756,       -2.4577709,
-3.3896556,       -2.8074982,       -2.5037759,
-3.4336,          -2.8621,          -2.5671),
10,3,byrow=TRUE)

if (p==1) crt <- matrix( c(
-2.9657928,       -2.3081543,       -1.9519926,
-3.1929596,       -2.5482619,       -2.1991651,
-3.3727717,       -2.7283918,       -2.3806008,
-3.4904849,       -2.8669056,       -2.5315918,
-3.6003166,       -2.9853079,       -2.6672416,
-3.6819803,       -3.0954760,       -2.7815263,
-3.7551759,       -3.1783550,       -2.8728146,
-3.8348596,       -3.2674954,       -2.9735550,
-3.8800989,       -3.3316415,       -3.0364171,
-3.9638,          -3.4126,          -3.1279),
10,3,byrow=TRUE)

if (r2<.1){
  ct <- crt[1,]
}else{
  r210 <- r2*10
  if (r210 >= 10){
    ct <- crt[10,]
  }else{
    r2a <- floor(r210)
    r2b <- ceiling(r210)
    wa  <- r2b - r210
    ct  <- wa*crt[r2a,] + (1-wa)*crt[r2b,]
  }
}
ct
}

#*******************************************************************#

ur_reg <- function(y,x,p,q,k1,k2){

t <- nrow(y)
m <- ncol(x)
ts <- 1 + max(rbind(q,(k1-1)))
tt <- t - ts - k2
dy <- as.matrix(y[2:t]-y[1:(t-1)])
dv <- as.matrix(dy[ts:(nrow(dy)-k2)])
z <- as.matrix(y[ts:(t-1-k2)])
if (p >= 0){
  z <- cbind(z,matrix(1,tt,1)) 
  if (p == 1) z <- cbind(z,as.matrix(seq(1,tt,1)))
}

for (qq in 1:q) z <- cbind(z,as.matrix(dy[(ts-qq):(nrow(dy)-qq-k2)]))  
z <- cbind(z,as.matrix(x[(ts+1):(t-k2),]))          
for (kk in 1:k1) z <- cbind(z,as.matrix(x[(ts-kk+1):(t-kk-k2),]))  
for (kk in 1:k2) z <- cbind(z,as.matrix(x[(ts+kk+1):(t+kk-k2),])) 

zzi <- solve(t(z)%*%z)  
beta <- zzi%*%(t(z)%*%dv)
e <- dv - z%*%beta
sig2 <- as.vector((t(e)%*%e)/(tt - 2 - (1+k1+k2)*m - q - p))
cov <- zzi*sig2
se <- as.matrix(sqrt(diag(cov)))
tstat <- beta[1]/se[1]

v <- e + as.matrix(z[,(3+p+q):ncol(z)])%*%as.matrix(beta[(3+p+q):ncol(z)])
out <- lr_var(cbind(v,e))
omega <- out$omega
bandw <- out$bandw
r2 <- omega[2,2]/omega[1,1]
rho2 <- (omega[2,1]^2)/(omega[1,1]*omega[2,2])

crits <- ur_crits(rho2,p)

if (urprint == 0){
bs <- format(cbind(beta,se),digits=4)
cat ("Regression Results","\n")
cat ("\n")
cat ("Time Trend Order   ", p,"\n")
cat ("Number AR Lags     ", q,"\n")
cat ("Number of Lag DXs  ", k1,"\n")
cat ("Number of Lead DXs ", k2,"\n")
if (kernel == 1) cat ("Kernel = Parzen","\n")
if (kernel == 2) cat ("Kernel = Quadratic Spectral","\n")
if (kernel == 3) cat ("Kernel = Bartlett","\n")
cat ("\n")
cat ("Y(t-1):","\n")
cat (bs[1,],"\n")
cat ("\n")
if (p >= 0){
cat ("Constant, Trend:","\n")
for (j in 2:(p+2)) cat (bs[j,],"\n")
cat ("\n")
}
cat ("DY(t-1) ... DY(t-q):","\n")
for (j in (p+3):(p+2+q)) cat (bs[j,],"\n")
cat ("\n")
cat ("X(t)","\n")
for (j in (p+3+q):(p+q+2+m)) cat (bs[j,],"\n")
cat ("\n")
if (k1 > 0){
cat ("X(t-1) ... X(t-k1):","\n")
for (j in (3+p+q+m):(2+p+q+m+k1*m)) cat (bs[j,],"\n")
cat ("\n")
}
if (k2 > 0){
cat ("X(t+1) ... X(t+k2):","\n")
for (j in (3+p+q+m+k1*m):nrow(bs)) cat (bs[j,],"\n")
cat ("\n")
}
cat ("Error Variance ", sig2,"\n")
cat ("\n")
cat ("Studentized Unit Root Test Statistic   ", tstat,"\n")
cat ("1%, 5%, 10% Asymptotic Critical Values ", crits,"\n")
cat ("\n")
cat ("Squared Correlation Measure (Rho^2)    ", rho2,"\n")
cat ("Variance Ratio (R^2)                   ", r2,"\n")
cat ("Bandwidth                              ", bandw,"\n")
cat ("\n")
}

list(tstat=tstat,crits=crits,rho2=rho2,sig2=sig2)
}

#*******************************************************************#

ur_adf <-function(y,p,q){

t <- nrow(y)
tt <- t - q - 1;
dy <- as.matrix(y[2:t]-y[1:(t-1)])
dv <- as.matrix(dy[(q+1):nrow(dy)])
z <- as.matrix(y[(q+1):(t-1)])
if (p >= 0){
  z <- cbind(z,matrix(1,tt,1)) 
  if (p == 1) z <- cbind(z,as.matrix(seq(1,tt,1)))
}
for (qq in 1:q) z <- cbind(z,as.matrix(dy[(q-qq+1):(nrow(dy)-qq)]))
zzi <- solve(t(z)%*%z)  
beta <- zzi%*%(t(z)%*%dv)
e <- dv - z%*%beta
sig2 <- as.vector((t(e)%*%e)/(tt - 2 - q - p))
cov <- zzi*sig2
se <- as.matrix(sqrt(diag(cov)))
tstat <- beta[1]/se[1]

if (urprint == 0){
bs <- format(cbind(beta,se),digits=4)
cat ("Regression Results","\n")
cat ("\n")
cat ("Y(t-1):","\n")
cat (bs[1,],"\n")
cat ("\n")
if (p >= 0){
cat ("Constant, Trend:","\n")
for (j in 2:(p+2)) cat (bs[j,],"\n")
cat ("\n")
}
if (q > 0){
cat ("DY(t-1) ... DY(t-q):","\n")
for (j in (p+3):(p+2+q)) cat (bs[j,],"\n")
cat ("\n")
}
cat ("Error Variance ", sig2,"\n")
cat ("\n")
cat ("t Statistic ", tstat,"\n")
}
list(tstat=tstat,sig2=sig2)
}