Code: First difference estimation in R: lfe vs. plm package

Greate data frame

# packages
library(dplyr)
library(plm)
## Warning: package 'plm' was built under R version 4.0.3
library(lfe)
# data structure
set.seed(1000)
N<-100 #number of obeservations
Y<-8 #number of years
d <- data.frame(
  id = rep(1:N, each = Y), # individual id
  year = rep(1:Y, N) + 2000
)

# covariates
d$x1 <- rnorm(N * Y, 0, 1)
d$x2 <- rnorm(N * Y, 0, 1)
# error term
d$e <- rnorm(N * Y, 0, 12)
# coefficients and outcomes
coef.x1 <- 1
coef.x2 <- 2
d$y<-coef.x1*d$x1 + coef.x2*d$x2 + d$e
# first differences
a<-lapply(
  split(d,d$id),
  function(s){
    # s<-split(d,d$id)[[1]]
    n<-c("y","x1","x2")
    for (v in n){
      # print(v)
      s[,paste0("d.",v)]<-s[,v]-dplyr::lag(s[,v])
    }
    return(s)
  }
)
d<-do.call("rbind",a)

FD estimations

stats package

f<-"d.y ~ 0 + d.x1 + d.x2 + factor(year)"
f<-as.formula(f)
e<-lm(formula=f,data=d)
summary(e)
##
## Call:
## lm(formula = f, data = d)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -47.293 -11.743  -1.155  11.854  59.287
##
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## d.x1              1.19072    0.49461   2.407   0.0163 *
## d.x2              1.91635    0.48240   3.973 7.86e-05 ***
## factor(year)2002  1.25844    1.76359   0.714   0.4757
## factor(year)2003 -0.07679    1.76679  -0.043   0.9653
## factor(year)2004 -0.86314    1.76454  -0.489   0.6249
## factor(year)2005  1.42952    1.76400   0.810   0.4180
## factor(year)2006 -1.63823    1.76283  -0.929   0.3530
## factor(year)2007 -0.49567    1.76290  -0.281   0.7787
## factor(year)2008  0.82060    1.76310   0.465   0.6418
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.63 on 691 degrees of freedom
##   (100 observations deleted due to missingness)
## Multiple R-squared:  0.03309,    Adjusted R-squared:  0.0205
## F-statistic: 2.628 on 9 and 691 DF,  p-value: 0.005416

lfe package

f<-"d.y ~ d.x1 + d.x2  | year | 0 | 0"
f<-as.formula(f)
e<-felm(formula=f,data=d)
summary(e)
##
## Call:
##    felm(formula = f, data = d)
##
## Residuals:
##     Min      1Q  Median      3Q     Max
## -47.293 -11.743  -1.155  11.854  59.287
##
## Coefficients:
##      Estimate Std. Error t value Pr(>|t|)
## d.x1   1.1907     0.4946   2.407   0.0163 *
## d.x2   1.9163     0.4824   3.973 7.86e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.63 on 691 degrees of freedom
##   (100 observations deleted due to missingness)
## Multiple R-squared(full model): 0.03307   Adjusted R-squared: 0.02188
## Multiple R-squared(proj model): 0.02895   Adjusted R-squared: 0.01771
## F-statistic(full model):2.955 on 8 and 691 DF, p-value: 0.002952
## F-statistic(proj model):  10.3 on 2 and 691 DF, p-value: 3.908e-05

plm package

f<-"y ~ 0 + x1 + x2"
f<-as.formula(f)
e<-plm(formula=f,data=d,effect="individual",model="fd",index=c("id","year"))
summary(e)
## Oneway (individual) effect First-Difference Model
##
## Call:
## plm(formula = f, data = d, effect = "individual", model = "fd",
##     index = c("id", "year"))
##
## Balanced Panel: n = 100, T = 8, N = 800
## Observations used in estimation: 700
##
## Residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
## -46.489 -12.001  -1.001   0.062  12.074  58.775
##
## Coefficients:
##    Estimate Std. Error t-value  Pr(>|t|)
## x1  1.19755    0.49240  2.4320   0.01526 *
## x2  1.93567    0.47893  4.0417 5.897e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares:    222020
## Residual Sum of Squares: 215470
## R-Squared:      0.029494
## Adj. R-Squared: 0.028104
## F-statistic: 10.6081 on 2 and 698 DF, p-value: 2.8947e-05