Skip to contents

Transform ISD data variables

Usage

isd_transform(x)

Arguments

x

(data.frame/tbl_df) data.frame/tbl from isd_parse or data.frame/tbl or list from isd_parse_line

Value

A tibble (data.frame) or list

Details

This function helps you clean your ISD data. isd_parse and isd_parse_line give back data without modifying the data. However, you'll likely want to transform some of the variables, in terms of the variable class (character to numeric), accounting for the scaling factor (variable X may need to be multiplied by 1000 according to the ISD docs), and missing values (unfortunately, missing value standards vary across ISD data).

operations performed

  • scale latitude by factor of 1000

  • scale longitude by factor of 1000

  • scale elevation by factor of 10

  • scale wind speed by factor of 10

  • scale temperature by factor of 10

  • scale temperature dewpoint by factor of 10

  • scale air pressure by factor of 10

  • scale precipitation by factor of 10

  • convert date to a Date class with as.Date

  • change wind direction to numeric

  • change total characters to numeric

Examples

path <- system.file('extdata/104270-99999-1928.gz', package = "isdparser")
(res <- isd_parse(path))
#> <path>/usr/lib/R/site-library/isdparser/extdata/104270-99999-1928.gz
#> # A tibble: 376 × 31
#>    total_chars usaf_station wban_station date     time  date_flag latitude
#>    <chr>       <chr>        <chr>        <chr>    <chr> <chr>     <chr>   
#>  1 0076        104270       99999        19280401 0600  4         +51183  
#>  2 0067        104270       99999        19280402 0600  4         +51183  
#>  3 0067        104270       99999        19280403 0600  4         +51183  
#>  4 0089        104270       99999        19280423 0600  4         +51183  
#>  5 0067        104270       99999        19280501 0600  4         +51183  
#>  6 0043        104270       99999        19280501 1200  4         +51183  
#>  7 0085        104270       99999        19280502 0600  4         +51183  
#>  8 0056        104270       99999        19280502 1200  4         +51183  
#>  9 0050        104270       99999        19280503 0600  4         +51183  
#> 10 0037        104270       99999        19280503 1200  4         +51183  
#> # … with 366 more rows, and 24 more variables: longitude <chr>,
#> #   type_code <chr>, elevation <chr>, call_letter <chr>, quality <chr>,
#> #   wind_direction <chr>, wind_direction_quality <chr>, wind_code <chr>,
#> #   wind_speed <chr>, wind_speed_quality <chr>, ceiling_height <chr>,
#> #   ceiling_height_quality <chr>, ceiling_height_determination <chr>,
#> #   ceiling_height_cavok <chr>, visibility_distance <chr>,
#> #   visibility_distance_quality <chr>, visibility_code <chr>, …
isd_transform(res)
#> # A tibble: 376 × 31
#>    total_chars usaf_station wban_station date       time  date_flag latitude
#>          <dbl> <chr>        <chr>        <date>     <chr> <chr>        <dbl>
#>  1          76 104270       99999        1928-04-01 0600  4             51.2
#>  2          67 104270       99999        1928-04-02 0600  4             51.2
#>  3          67 104270       99999        1928-04-03 0600  4             51.2
#>  4          89 104270       99999        1928-04-23 0600  4             51.2
#>  5          67 104270       99999        1928-05-01 0600  4             51.2
#>  6          43 104270       99999        1928-05-01 1200  4             51.2
#>  7          85 104270       99999        1928-05-02 0600  4             51.2
#>  8          56 104270       99999        1928-05-02 1200  4             51.2
#>  9          50 104270       99999        1928-05-03 0600  4             51.2
#> 10          37 104270       99999        1928-05-03 1200  4             51.2
#> # … with 366 more rows, and 24 more variables: longitude <dbl>,
#> #   type_code <chr>, elevation <dbl>, call_letter <chr>, quality <chr>,
#> #   wind_direction <dbl>, wind_direction_quality <chr>, wind_code <chr>,
#> #   wind_speed <dbl>, wind_speed_quality <chr>, ceiling_height <chr>,
#> #   ceiling_height_quality <chr>, ceiling_height_determination <chr>,
#> #   ceiling_height_cavok <chr>, visibility_distance <chr>,
#> #   visibility_distance_quality <chr>, visibility_code <chr>, …

lns <- readLines(path, encoding = "latin1")
# data.frame
(res <- isd_parse_line(lns[1]))
#> # A tibble: 1 × 31
#>   total_chars usaf_station wban_station date  time  date_flag latitude longitude
#>   <chr>       <chr>        <chr>        <chr> <chr> <chr>     <chr>    <chr>    
#> 1 0076        104270       99999        1928… 0600  4         +51183   +008483  
#> # … with 23 more variables: type_code <chr>, elevation <chr>,
#> #   call_letter <chr>, quality <chr>, wind_direction <chr>,
#> #   wind_direction_quality <chr>, wind_code <chr>, wind_speed <chr>,
#> #   wind_speed_quality <chr>, ceiling_height <chr>,
#> #   ceiling_height_quality <chr>, ceiling_height_determination <chr>,
#> #   ceiling_height_cavok <chr>, visibility_distance <chr>,
#> #   visibility_distance_quality <chr>, visibility_code <chr>, …
isd_transform(res)
#> # A tibble: 1 × 31
#>   total_chars usaf_station wban_station date       time  date_flag latitude
#>         <dbl> <chr>        <chr>        <date>     <chr> <chr>        <dbl>
#> 1          76 104270       99999        1928-04-01 0600  4             51.2
#> # … with 24 more variables: longitude <dbl>, type_code <chr>, elevation <dbl>,
#> #   call_letter <chr>, quality <chr>, wind_direction <dbl>,
#> #   wind_direction_quality <chr>, wind_code <chr>, wind_speed <dbl>,
#> #   wind_speed_quality <chr>, ceiling_height <chr>,
#> #   ceiling_height_quality <chr>, ceiling_height_determination <chr>,
#> #   ceiling_height_cavok <chr>, visibility_distance <chr>,
#> #   visibility_distance_quality <chr>, visibility_code <chr>, …
# list
(res <- isd_parse_line(lns[1], as_data_frame = FALSE))
#> $total_chars
#> [1] "0076"
#> 
#> $usaf_station
#> [1] "104270"
#> 
#> $wban_station
#> [1] "99999"
#> 
#> $date
#> [1] "19280401"
#> 
#> $time
#> [1] "0600"
#> 
#> $date_flag
#> [1] "4"
#> 
#> $latitude
#> [1] "+51183"
#> 
#> $longitude
#> [1] "+008483"
#> 
#> $type_code
#> [1] "FM-12"
#> 
#> $elevation
#> [1] "+0257"
#> 
#> $call_letter
#> [1] "99999"
#> 
#> $quality
#> [1] "V020"
#> 
#> $wind_direction
#> [1] "999"
#> 
#> $wind_direction_quality
#> [1] "9"
#> 
#> $wind_code
#> [1] "9"
#> 
#> $wind_speed
#> [1] "0046"
#> 
#> $wind_speed_quality
#> [1] "1"
#> 
#> $ceiling_height
#> [1] "99999"
#> 
#> $ceiling_height_quality
#> [1] "9"
#> 
#> $ceiling_height_determination
#> [1] "9"
#> 
#> $ceiling_height_cavok
#> [1] "N"
#> 
#> $visibility_distance
#> [1] "000000"
#> 
#> $visibility_distance_quality
#> [1] "1"
#> 
#> $visibility_code
#> [1] "N"
#> 
#> $visibility_code_quality
#> [1] "9"
#> 
#> $temperature
#> [1] "+9999"
#> 
#> $temperature_quality
#> [1] "9"
#> 
#> $temperature_dewpoint
#> [1] "+9999"
#> 
#> $temperature_dewpoint_quality
#> [1] "9"
#> 
#> $air_pressure
#> [1] "99999"
#> 
#> $air_pressure_quality
#> [1] "9"
#> 
isd_transform(res)
#> $total_chars
#> [1] 76
#> 
#> $usaf_station
#> [1] "104270"
#> 
#> $wban_station
#> [1] "99999"
#> 
#> $date
#> [1] "1928-04-01"
#> 
#> $time
#> [1] "0600"
#> 
#> $date_flag
#> [1] "4"
#> 
#> $latitude
#> [1] 51.183
#> 
#> $longitude
#> [1] 8.483
#> 
#> $type_code
#> [1] "FM-12"
#> 
#> $elevation
#> [1] 25.7
#> 
#> $call_letter
#> [1] "99999"
#> 
#> $quality
#> [1] "V020"
#> 
#> $wind_direction
#> [1] 999
#> 
#> $wind_direction_quality
#> [1] "9"
#> 
#> $wind_code
#> [1] "9"
#> 
#> $wind_speed
#> [1] 4.6
#> 
#> $wind_speed_quality
#> [1] "1"
#> 
#> $ceiling_height
#> [1] "99999"
#> 
#> $ceiling_height_quality
#> [1] "9"
#> 
#> $ceiling_height_determination
#> [1] "9"
#> 
#> $ceiling_height_cavok
#> [1] "N"
#> 
#> $visibility_distance
#> [1] "000000"
#> 
#> $visibility_distance_quality
#> [1] "1"
#> 
#> $visibility_code
#> [1] "N"
#> 
#> $visibility_code_quality
#> [1] "9"
#> 
#> $temperature
#> [1] 999.9
#> 
#> $temperature_quality
#> [1] "9"
#> 
#> $temperature_dewpoint
#> [1] 999.9
#> 
#> $temperature_dewpoint_quality
#> [1] "9"
#> 
#> $air_pressure
#> [1] 9999.9
#> 
#> $air_pressure_quality
#> [1] "9"
#>