Date based cleaning

date_standardize(x, format = "%Y-%m-%d", date_column = "date", ...)

date_missing(x, date_column = "date", drop = TRUE, ...)

date_create(x, ...)

date_create_(x, ..., .dots, format = "%Y-%m-%d", date_column = "date")

Arguments

x

(data.frame) A data.frame

format

(character) Date format. See as.Date()

date_column

(character) Name of the date column

...

Comma separated list of unquoted variable names

drop

(logical) Drop bad data points or not. Either way, we parse out bade data points as an attribute you can access. Default: TRUE

.dots

Used to work around non-standard evaluation

Value

Returns a data.frame, with attributes

Details

  • date_standardize - Converts dates to a specific format

  • date_missing - Drops records that do not have dates, either via being NA or being a zero length character string

  • date_create - Create a date field from

Examples

df <- sample_data_1 # Standardize dates dframe(df) %>% date_standardize()
#> # A tibble: 1,500 x 5 #> name longitude latitude date key #> <chr> <dbl> <dbl> <chr> <int> #> 1 Ursus americanus -79.7 38.4 2015-01-14 1065590124 #> 2 Ursus americanus -82.4 35.7 2015-01-13 1065588899 #> 3 Ursus americanus -99.1 23.7 2015-02-20 1098894889 #> 4 Ursus americanus -72.8 43.9 2015-02-13 1065611122 #> 5 Ursus americanus -72.3 43.9 2015-03-01 1088908315 #> 6 Ursus americanus -109. 32.7 2015-03-29 1088932238 #> 7 Ursus americanus -109. 32.7 2015-03-29 1088932273 #> 8 Ursus americanus -124. 40.1 2015-03-28 1132403409 #> 9 Ursus americanus -78.3 36.9 2015-03-20 1088923534 #> 10 Ursus americanus -76.8 35.5 2015-04-05 1088954559 #> # … with 1,490 more rows
dframe(df) %>% date_standardize("%Y/%m/%d")
#> # A tibble: 1,500 x 5 #> name longitude latitude date key #> <chr> <dbl> <dbl> <chr> <int> #> 1 Ursus americanus -79.7 38.4 2015/01/14 1065590124 #> 2 Ursus americanus -82.4 35.7 2015/01/13 1065588899 #> 3 Ursus americanus -99.1 23.7 2015/02/20 1098894889 #> 4 Ursus americanus -72.8 43.9 2015/02/13 1065611122 #> 5 Ursus americanus -72.3 43.9 2015/03/01 1088908315 #> 6 Ursus americanus -109. 32.7 2015/03/29 1088932238 #> 7 Ursus americanus -109. 32.7 2015/03/29 1088932273 #> 8 Ursus americanus -124. 40.1 2015/03/28 1132403409 #> 9 Ursus americanus -78.3 36.9 2015/03/20 1088923534 #> 10 Ursus americanus -76.8 35.5 2015/04/05 1088954559 #> # … with 1,490 more rows
dframe(df) %>% date_standardize("%d%b%Y")
#> # A tibble: 1,500 x 5 #> name longitude latitude date key #> <chr> <dbl> <dbl> <chr> <int> #> 1 Ursus americanus -79.7 38.4 14Jan2015 1065590124 #> 2 Ursus americanus -82.4 35.7 13Jan2015 1065588899 #> 3 Ursus americanus -99.1 23.7 20Feb2015 1098894889 #> 4 Ursus americanus -72.8 43.9 13Feb2015 1065611122 #> 5 Ursus americanus -72.3 43.9 01Mar2015 1088908315 #> 6 Ursus americanus -109. 32.7 29Mar2015 1088932238 #> 7 Ursus americanus -109. 32.7 29Mar2015 1088932273 #> 8 Ursus americanus -124. 40.1 28Mar2015 1132403409 #> 9 Ursus americanus -78.3 36.9 20Mar2015 1088923534 #> 10 Ursus americanus -76.8 35.5 05Apr2015 1088954559 #> # … with 1,490 more rows
dframe(df) %>% date_standardize("%Y")
#> # A tibble: 1,500 x 5 #> name longitude latitude date key #> <chr> <dbl> <dbl> <chr> <int> #> 1 Ursus americanus -79.7 38.4 2015 1065590124 #> 2 Ursus americanus -82.4 35.7 2015 1065588899 #> 3 Ursus americanus -99.1 23.7 2015 1098894889 #> 4 Ursus americanus -72.8 43.9 2015 1065611122 #> 5 Ursus americanus -72.3 43.9 2015 1088908315 #> 6 Ursus americanus -109. 32.7 2015 1088932238 #> 7 Ursus americanus -109. 32.7 2015 1088932273 #> 8 Ursus americanus -124. 40.1 2015 1132403409 #> 9 Ursus americanus -78.3 36.9 2015 1088923534 #> 10 Ursus americanus -76.8 35.5 2015 1088954559 #> # … with 1,490 more rows
dframe(df) %>% date_standardize("%y")
#> # A tibble: 1,500 x 5 #> name longitude latitude date key #> <chr> <dbl> <dbl> <chr> <int> #> 1 Ursus americanus -79.7 38.4 15 1065590124 #> 2 Ursus americanus -82.4 35.7 15 1065588899 #> 3 Ursus americanus -99.1 23.7 15 1098894889 #> 4 Ursus americanus -72.8 43.9 15 1065611122 #> 5 Ursus americanus -72.3 43.9 15 1088908315 #> 6 Ursus americanus -109. 32.7 15 1088932238 #> 7 Ursus americanus -109. 32.7 15 1088932273 #> 8 Ursus americanus -124. 40.1 15 1132403409 #> 9 Ursus americanus -78.3 36.9 15 1088923534 #> 10 Ursus americanus -76.8 35.5 15 1088954559 #> # … with 1,490 more rows
# drop records without dates NROW(df)
#> [1] 1500
NROW(dframe(df) %>% date_missing())
#> [1] 1498
# Create date field from other fields df <- sample_data_2 ## NSE dframe(df) %>% date_create(year, month, day)
#> # A tibble: 1,500 x 8 #> name longitude latitude key year month day date #> <chr> <dbl> <dbl> <int> <chr> <chr> <chr> <chr> #> 1 Ursus americanus -79.7 38.4 1065590124 2015 01 14 2015-01-14 #> 2 Ursus americanus -82.4 35.7 1065588899 2015 01 13 2015-01-13 #> 3 Ursus americanus -99.1 23.7 1098894889 2015 02 20 2015-02-20 #> 4 Ursus americanus -72.8 43.9 1065611122 2015 02 13 2015-02-13 #> 5 Ursus americanus -72.3 43.9 1088908315 2015 03 01 2015-03-01 #> 6 Ursus americanus -109. 32.7 1088932238 2015 03 29 2015-03-29 #> 7 Ursus americanus -109. 32.7 1088932273 2015 03 29 2015-03-29 #> 8 Ursus americanus -124. 40.1 1132403409 2015 03 28 2015-03-28 #> 9 Ursus americanus -78.3 36.9 1088923534 2015 03 20 2015-03-20 #> 10 Ursus americanus -76.8 35.5 1088954559 2015 04 05 2015-04-05 #> # … with 1,490 more rows
## SE date_create_(dframe(df), "year", "month", "day")
#> # A tibble: 1,500 x 8 #> name longitude latitude key year month day date #> <chr> <dbl> <dbl> <int> <chr> <chr> <chr> <chr> #> 1 Ursus americanus -79.7 38.4 1065590124 2015 01 14 2015-01-14 #> 2 Ursus americanus -82.4 35.7 1065588899 2015 01 13 2015-01-13 #> 3 Ursus americanus -99.1 23.7 1098894889 2015 02 20 2015-02-20 #> 4 Ursus americanus -72.8 43.9 1065611122 2015 02 13 2015-02-13 #> 5 Ursus americanus -72.3 43.9 1088908315 2015 03 01 2015-03-01 #> 6 Ursus americanus -109. 32.7 1088932238 2015 03 29 2015-03-29 #> 7 Ursus americanus -109. 32.7 1088932273 2015 03 29 2015-03-29 #> 8 Ursus americanus -124. 40.1 1132403409 2015 03 28 2015-03-28 #> 9 Ursus americanus -78.3 36.9 1088923534 2015 03 20 2015-03-20 #> 10 Ursus americanus -76.8 35.5 1088954559 2015 04 05 2015-04-05 #> # … with 1,490 more rows