read_vc() handles git2rdata objects stored by write_vc(). It reads and verifies the metadata file (.yml). Then it reads and verifies the raw data. The last step is back-transforming any transformation done by meta() to return the data.frame as stored by write_vc().

read_vc() is an S3 generic on root which currently handles "character" (a path) and "git-repository" (from git2r). S3 methods for other version control system could be added.

read_vc(file, root = ".")

Arguments

file

the name of the git2rdata object. Git2rdata objects cannot have dots in their name. The name may include a relative path. file is a path relative to the root. Note that file must point to a location within root.

root

The root of a project. Can be a file path or a git-repository. Defaults to the current working directory (".").

Value

The data.frame with the file names and hashes as attributes.

See also

Other storage: list_data(), prune_meta(), relabel(), rm_data(), write_vc()

Examples

## on file system # create a directory root <- tempfile("git2rdata-") dir.create(root) # write a dataframe to the directory write_vc(iris[1:6, ], file = "iris", root = root, sorting = "Sepal.Length")
#> 09d5bfd6a65e682a4ca030c766348180861568c8 #> "iris.tsv" #> 0d434e56d22a710c99c5b912e8624d52abd41aaf #> "iris.yml"
# check that a data file (.tsv) and a metadata file (.yml) exist. list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml"
# read the git2rdata object from the directory read_vc("iris", root)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> 1 4.6 3.1 1.5 0.2 setosa #> 2 4.7 3.2 1.3 0.2 setosa #> 3 4.9 3.0 1.4 0.2 setosa #> 4 5.0 3.6 1.4 0.2 setosa #> 5 5.1 3.5 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa
# store a new version with different observations but the same metadata write_vc(iris[1:5, ], "iris", root)
#> 31ff841b58e569e8a4a4ac2f02152295c19f94db #> "iris.tsv" #> 0d434e56d22a710c99c5b912e8624d52abd41aaf #> "iris.yml"
list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml"
# Removing a column requires version requires new metadata. # Add strict = FALSE to override the existing metadata. write_vc( iris[1:6, -2], "iris", root, sorting = "Sepal.Length", strict = FALSE )
#> Warning: Changes in the metadata may lead to unnecessarily large diffs. #> See vignette('version_control', package = 'git2rdata') for more information. #> #> - New data has a different number of variables. #> - Deleted variables: Sepal.Width.
#> b2098d507b0d749a86bb61a185ab2d31f7622418 #> "iris.tsv" #> 274646434951b078260e194a51f349a30777ebf2 #> "iris.yml"
list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml"
# storing the orignal version again requires another update of the metadata write_vc(iris[1:6, ], "iris", root, sorting = "Sepal.Width", strict = FALSE)
#> Warning: Changes in the metadata may lead to unnecessarily large diffs. #> See vignette('version_control', package = 'git2rdata') for more information. #> #> - The sorting variables changed. #> - Sorting for the new data: 'Sepal.Width'. #> - Sorting for the old data: 'Sepal.Length'. #> - New data has a different number of variables. #> - New variables: Sepal.Width.
#> 4045436d3a61801f4eaad5769e32726838deecbc #> "iris.tsv" #> 928750d3071a23b52b05b88f0c2cb6f10b09789d #> "iris.yml"
list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml"
# optimize = FALSE stores the data more verbose. This requires larger files. write_vc( iris[1:6, ], "iris2", root, sorting = "Sepal.Width", optimize = FALSE )
#> cf69e9daed7abacb28aaa1bff7a98c9e61dd8eca #> "iris2.tsv" #> 65cc08c8736657fd3e523180b46353de368b22d1 #> "iris2.yml"
list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml" "iris2.tsv" "iris2.yml"
## on git repo using a git2r::git-repository # initialise a git repo using the git2r package repo_path <- tempfile("git2rdata-repo-") dir.create(repo_path) repo <- git2r::init(repo_path) git2r::config(repo, user.name = "Alice", user.email = "[email protected]") # store a dataframe in git repo. write_vc(iris[1:6, ], file = "iris", root = repo, sorting = "Sepal.Length")
#> 09d5bfd6a65e682a4ca030c766348180861568c8 #> "iris.tsv" #> 0d434e56d22a710c99c5b912e8624d52abd41aaf #> "iris.yml"
# This git2rdata object is not staged by default. status(repo)
#> Untracked files: #> Untracked: iris.tsv #> Untracked: iris.yml #>
# read a dataframe from a git repo read_vc("iris", repo)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> 1 4.6 3.1 1.5 0.2 setosa #> 2 4.7 3.2 1.3 0.2 setosa #> 3 4.9 3.0 1.4 0.2 setosa #> 4 5.0 3.6 1.4 0.2 setosa #> 5 5.1 3.5 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa
# store a new version in the git repo and stage it in one go write_vc(iris[1:5, ], "iris", repo, stage = TRUE)
#> 31ff841b58e569e8a4a4ac2f02152295c19f94db #> "iris.tsv" #> 0d434e56d22a710c99c5b912e8624d52abd41aaf #> "iris.yml"
status(repo)
#> Staged changes: #> New: iris.tsv #> New: iris.yml #>
# store a verbose version in a different gir2data object write_vc( iris[1:6, ], "iris2", repo, sorting = "Sepal.Width", optimize = FALSE )
#> cf69e9daed7abacb28aaa1bff7a98c9e61dd8eca #> "iris2.tsv" #> 65cc08c8736657fd3e523180b46353de368b22d1 #> "iris2.yml"
status(repo)
#> Untracked files: #> Untracked: iris2.tsv #> Untracked: iris2.yml #> #> Staged changes: #> New: iris.tsv #> New: iris.yml #>
# clean up junk <- file.remove( list.files(root, full.names = TRUE, recursive = TRUE), root) junk <- file.remove( rev(list.files(repo_path, full.names = TRUE, recursive = TRUE, include.dirs = TRUE, all.files = TRUE)), repo_path)