rm(list=ls()) library(tidyverse) library(data.table) library(ggplot2) library(readxl) library(RColorBrewer) library(wooldridge) library(foreign) df = read.dta("http://dss.princeton.edu/training/Panel101.dta") # 1) Create a dummy variable to indicate the time when the treatment started. Lets #assume that treatment started in 1994. In this case, years before 1994 will have a #value of 0 and 1994+ a 1. ## 2) Create a dummy variable to identify the group exposed to the treatment. In this #example lets assumed that countries with code 5,6, and 7 were treated (=1). #Countries 1-4 were not treated (=0). # 3) Create an interaction between time and the treated. The coefficient on the interaction corresponds to the DiD estimate. df <- df %>% mutate (after = ifelse(year >= 1994, 1, 0)) %>% mutate(treated = ifelse(country == "E"|country == "F"|country == "G",1,0)) %>% mutate(treated_after = after*treated) DiD_mod = lm(y ~ treated + after + treated_after, data = df) summary(DiD_mod) # The coefficient for treated_after is the differences-in-differences estimator. # The effect is significant at 10% with the treatment having a negative effect.