Supporting Statistical Analysis for Research
5.5 Date and time variables
These exercises use the MplsStops.csv data set
Import the
MplsStops.csvfile.MplsStops_path <- file.path("..", "datasets", "MplsStops.csv") MplsStops_in <- read_csv(MplsStops_path, guess_max = 100000, col_types = cols())Warning: Missing column names filled in: 'X1' [1]MplsStops_in <- MplsStops_in %>% rename( id_num = idNum, citation_issued = citationIssued, person_search = personSearch, vehicle_search = vehicleSearch, pre_race = preRace, police_precinct = policePrecinct ) MplsStops <- MplsStops_in %>% select(-X1) glimpse(MplsStops)Observations: 51,920 Variables: 14 $ id_num <chr> "17-000003", "17-000007", "17-000073", "17-000... $ date <dttm> 2017-01-01 00:00:42, 2017-01-01 00:03:07, 201... $ problem <chr> "suspicious", "suspicious", "traffic", "suspic... $ MDC <chr> "MDC", "MDC", "MDC", "MDC", "MDC", "MDC", "MDC... $ citation_issued <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA... $ person_search <chr> "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO"... $ vehicle_search <chr> "NO", "NO", "NO", "NO", "NO", "NO", "NO", "NO"... $ pre_race <chr> "Unknown", "Unknown", "Unknown", "Unknown", "U... $ race <chr> "Unknown", "Unknown", "White", "East African",... $ gender <chr> "Unknown", "Male", "Female", "Male", "Female",... $ lat <dbl> 44.96662, 44.98045, 44.94835, 44.94836, 44.979... $ long <dbl> -93.24646, -93.27134, -93.27538, -93.28135, -9... $ police_precinct <dbl> 1, 1, 5, 5, 1, 1, 1, 2, 2, 4, 5, 1, 2, 1, 1, 1... $ neighborhood <chr> "Cedar Riverside", "Downtown West", "Whittier"...Create a day of the week variable.
MplsStops <- MplsStops %>% mutate( day = wday(date, label=TRUE) ) MplsStops %>% select(id_num, date, day) %>% head()# A tibble: 6 x 3 id_num date day <chr> <dttm> <ord> 1 17-000003 2017-01-01 00:00:42 Sun 2 17-000007 2017-01-01 00:03:07 Sun 3 17-000073 2017-01-01 00:23:15 Sun 4 17-000092 2017-01-01 00:33:48 Sun 5 17-000098 2017-01-01 00:37:58 Sun 6 17-000111 2017-01-01 00:46:48 SunCreate a variable that measures the amount of time that has passed between the prior stop and the current stop.
MplsStops <- MplsStops %>% arrange(date) %>% mutate( prior_date = lag(date), time_from_prior = difftime(date, prior_date, units = "mins") ) MplsStops %>% select(date, prior_date, time_from_prior) %>% head()# A tibble: 6 x 3 date prior_date time_from_prior <dttm> <dttm> <drtn> 1 2017-01-01 00:00:42 NA NA mins 2 2017-01-01 00:03:07 2017-01-01 00:00:42 2.416667 mins 3 2017-01-01 00:23:15 2017-01-01 00:03:07 20.133333 mins 4 2017-01-01 00:33:48 2017-01-01 00:23:15 10.550000 mins 5 2017-01-01 00:37:58 2017-01-01 00:33:48 4.166667 mins 6 2017-01-01 00:46:48 2017-01-01 00:37:58 8.833333 minsOn September 8th, 2017 Minneapolis swore in new police chief (story.) Create an indicator variable that identifies observations that occurred on September 9th or later in the data frame.
MplsStops <- MplsStops %>% mutate( new_chief = date >= ymd_hms("2017-09-09 00:00:00") ) MplsStops %>% select(id_num, date, day, new_chief) %>% slice(37584:37590) %>% head()# A tibble: 6 x 4 id_num date day new_chief <chr> <dttm> <ord> <lgl> 1 17-344351 2017-09-08 23:57:21 Fri FALSE 2 17-344353 2017-09-08 23:58:43 Fri FALSE 3 17-344368 2017-09-09 00:09:59 Sat TRUE 4 17-344369 2017-09-09 00:10:34 Sat TRUE 5 17-344377 2017-09-09 00:15:30 Sat TRUE 6 17-344382 2017-09-09 00:19:43 Sat TRUE