Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# haven (development version)

* `col_select` in the `read_*()` functions now correctly implements the
tidyselect interface. Columns will be returned in the order specified in
`col_select` and can be renamed, e.g. `col_select = c(new = old)` (#685).

* `col_select` works correctly when combined with `.name_repair` for files with
duplicate column names (#687).

* `write_*()` functions now take into account the width of value labels when
calculating string variable widths. Previously it was possible to create
value label sets with values that were wider than the string variable, causing
Expand Down
40 changes: 20 additions & 20 deletions R/cpp11.R
Original file line number Diff line number Diff line change
@@ -1,43 +1,43 @@
# Generated by cpp11: do not edit by hand

df_parse_sas_file <- function(spec_b7dat, spec_b7cat, encoding, catalog_encoding, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_sas_file`, spec_b7dat, spec_b7cat, encoding, catalog_encoding, cols_skip, n_max, rows_skip, name_repair)
df_parse_sas_file <- function(spec_b7dat, spec_b7cat, encoding, catalog_encoding, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_sas_file`, spec_b7dat, spec_b7cat, encoding, catalog_encoding, cols_skip, n_max, rows_skip)
}

df_parse_sas_raw <- function(spec_b7dat, spec_b7cat, encoding, catalog_encoding, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_sas_raw`, spec_b7dat, spec_b7cat, encoding, catalog_encoding, cols_skip, n_max, rows_skip, name_repair)
df_parse_sas_raw <- function(spec_b7dat, spec_b7cat, encoding, catalog_encoding, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_sas_raw`, spec_b7dat, spec_b7cat, encoding, catalog_encoding, cols_skip, n_max, rows_skip)
}

df_parse_xpt_file <- function(spec, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_xpt_file`, spec, cols_skip, n_max, rows_skip, name_repair)
df_parse_xpt_file <- function(spec, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_xpt_file`, spec, cols_skip, n_max, rows_skip)
}

df_parse_xpt_raw <- function(spec, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_xpt_raw`, spec, cols_skip, n_max, rows_skip, name_repair)
df_parse_xpt_raw <- function(spec, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_xpt_raw`, spec, cols_skip, n_max, rows_skip)
}

df_parse_dta_file <- function(spec, encoding, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_dta_file`, spec, encoding, cols_skip, n_max, rows_skip, name_repair)
df_parse_dta_file <- function(spec, encoding, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_dta_file`, spec, encoding, cols_skip, n_max, rows_skip)
}

df_parse_dta_raw <- function(spec, encoding, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_dta_raw`, spec, encoding, cols_skip, n_max, rows_skip, name_repair)
df_parse_dta_raw <- function(spec, encoding, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_dta_raw`, spec, encoding, cols_skip, n_max, rows_skip)
}

df_parse_sav_file <- function(spec, encoding, user_na, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_sav_file`, spec, encoding, user_na, cols_skip, n_max, rows_skip, name_repair)
df_parse_sav_file <- function(spec, encoding, user_na, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_sav_file`, spec, encoding, user_na, cols_skip, n_max, rows_skip)
}

df_parse_sav_raw <- function(spec, encoding, user_na, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_sav_raw`, spec, encoding, user_na, cols_skip, n_max, rows_skip, name_repair)
df_parse_sav_raw <- function(spec, encoding, user_na, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_sav_raw`, spec, encoding, user_na, cols_skip, n_max, rows_skip)
}

df_parse_por_file <- function(spec, encoding, user_na, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_por_file`, spec, encoding, user_na, cols_skip, n_max, rows_skip, name_repair)
df_parse_por_file <- function(spec, encoding, user_na, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_por_file`, spec, encoding, user_na, cols_skip, n_max, rows_skip)
}

df_parse_por_raw <- function(spec, encoding, user_na, cols_skip, n_max, rows_skip, name_repair) {
.Call(`_haven_df_parse_por_raw`, spec, encoding, user_na, cols_skip, n_max, rows_skip, name_repair)
df_parse_por_raw <- function(spec, encoding, user_na, cols_skip, n_max, rows_skip) {
.Call(`_haven_df_parse_por_raw`, spec, encoding, user_na, cols_skip, n_max, rows_skip)
}

write_sav_ <- function(data, path, compress) {
Expand Down
36 changes: 22 additions & 14 deletions R/haven-sas.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@
#' encoding specified in the file; use this argument to override it if it is
#' incorrect.
#' @inheritParams tibble::as_tibble
#' @param col_select One or more selection expressions, like in
#' [dplyr::select()]. Use `c()` or `list()` to use more than one expression.
#' See `?dplyr::select` for details on available selection options. Only the
#' specified columns will be read from `data_file`.
#' @param col_select Columns to include in the results. You can use the same
#' mini-language as `dplyr::select()` to refer to the columns by name. Use
#' `c()` to use more than one selection expression. Although this
#' usage is less common, `col_select` also accepts a numeric column index. See
#' [`?tidyselect::language`][tidyselect::language] for full details on the
#' selection language.
#'
#' Predicates using [`where()`][tidyselect::where] are not supported.
#' @param skip Number of lines to skip before reading data.
#' @param n_max Maximum number of lines to read.
#' @param cols_only `r lifecycle::badge("deprecated")` `cols_only` is no longer
Expand Down Expand Up @@ -45,21 +49,23 @@ read_sas <- function(data_file, catalog_file = NULL,
encoding <- ""
}

cols_skip <- skip_cols(read_sas, !!col_select, data_file, encoding = encoding)
spec_data <- readr::datasource(data_file)
cols <- select_cols(read_sas, !!col_select, spec_data, encoding = encoding, .name_repair = .name_repair)
n_max <- validate_n_max(n_max)

spec_data <- readr::datasource(data_file)
if (is.null(catalog_file)) {
spec_cat <- list()
} else {
spec_cat <- readr::datasource(catalog_file)
}

switch(class(spec_data)[1],
source_file = df_parse_sas_file(spec_data, spec_cat, encoding = encoding, catalog_encoding = catalog_encoding, cols_skip = cols_skip, n_max = n_max, rows_skip = skip, name_repair = .name_repair),
source_raw = df_parse_sas_raw(spec_data, spec_cat, encoding = encoding, catalog_encoding = catalog_encoding, cols_skip = cols_skip, n_max = n_max, rows_skip = skip, name_repair = .name_repair),
data <- switch(class(spec_data)[1],
source_file = df_parse_sas_file(spec_data, spec_cat, encoding = encoding, catalog_encoding = catalog_encoding, cols_skip = cols$skip, n_max = n_max, rows_skip = skip),
source_raw = df_parse_sas_raw(spec_data, spec_cat, encoding = encoding, catalog_encoding = catalog_encoding, cols_skip = cols$skip, n_max = n_max, rows_skip = skip),
cli_abort("This kind of input is not handled.")
)

output_cols(data, cols, .name_repair)
}

#' Write SAS files
Expand Down Expand Up @@ -119,15 +125,17 @@ write_sas <- function(data, path) {
#' write_xpt(mtcars, tmp)
#' read_xpt(tmp)
read_xpt <- function(file, col_select = NULL, skip = 0, n_max = Inf, .name_repair = "unique") {
cols_skip <- skip_cols(read_xpt, {{ col_select }}, file)
spec <- readr::datasource(file)
cols <- select_cols(read_xpt, {{ col_select }}, spec, .name_repair = .name_repair)
n_max <- validate_n_max(n_max)

spec <- readr::datasource(file)
switch(class(spec)[1],
source_file = df_parse_xpt_file(spec, cols_skip, n_max, skip, name_repair = .name_repair),
source_raw = df_parse_xpt_raw(spec, cols_skip, n_max, skip, name_repair = .name_repair),
data <- switch(class(spec)[1],
source_file = df_parse_xpt_file(spec, cols$skip, n_max, skip),
source_raw = df_parse_xpt_raw(spec, cols$skip, n_max, skip),
cli_abort("This kind of input is not handled.")
)

output_cols(data, cols, .name_repair)
}

#' @export
Expand Down
24 changes: 14 additions & 10 deletions R/haven-spss.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,29 +39,33 @@ read_sav <- function(file, encoding = NULL, user_na = FALSE, col_select = NULL,
encoding <- ""
}

cols_skip <- skip_cols(read_sav, {{ col_select }}, file, encoding)
spec <- readr::datasource(file)
cols <- select_cols(read_sav, {{ col_select }}, spec, encoding, .name_repair = .name_repair)
n_max <- validate_n_max(n_max)

spec <- readr::datasource(file)
switch(class(spec)[1],
source_file = df_parse_sav_file(spec, encoding, user_na, cols_skip, n_max, skip, name_repair = .name_repair),
source_raw = df_parse_sav_raw(spec, encoding, user_na, cols_skip, n_max, skip, name_repair = .name_repair),
data <- switch(class(spec)[1],
source_file = df_parse_sav_file(spec, encoding, user_na, cols$skip, n_max, skip),
source_raw = df_parse_sav_raw(spec, encoding, user_na, cols$skip, n_max, skip),
cli_abort("This kind of input is not handled.")
)

output_cols(data, cols, .name_repair)
}

#' @export
#' @rdname read_spss
read_por <- function(file, user_na = FALSE, col_select = NULL, skip = 0, n_max = Inf, .name_repair = "unique") {
cols_skip <- skip_cols(read_por, {{ col_select }}, file)
spec <- readr::datasource(file)
cols <- select_cols(read_por, {{ col_select }}, spec, .name_repair = .name_repair)
n_max <- validate_n_max(n_max)

spec <- readr::datasource(file)
switch(class(spec)[1],
source_file = df_parse_por_file(spec, encoding = "", user_na = user_na, cols_skip, n_max, skip, name_repair = .name_repair),
source_raw = df_parse_por_raw(spec, encoding = "", user_na = user_na, cols_skip, n_max, skip, name_repair = .name_repair),
data <- switch(class(spec)[1],
source_file = df_parse_por_file(spec, encoding = "", user_na = user_na, cols$skip, n_max, skip),
source_raw = df_parse_por_raw(spec, encoding = "", user_na = user_na, cols$skip, n_max, skip),
cli_abort("This kind of input is not handled.")
)

output_cols(data, cols, .name_repair)
}

#' @export
Expand Down
12 changes: 7 additions & 5 deletions R/haven-stata.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,17 @@ read_dta <- function(file, encoding = NULL, col_select = NULL, skip = 0, n_max =
encoding <- ""
}

cols_skip <- skip_cols(read_dta, {{ col_select }}, file, encoding)
spec <- readr::datasource(file)
cols <- select_cols(read_dta, {{ col_select }}, spec, encoding, .name_repair = .name_repair)
n_max <- validate_n_max(n_max)

spec <- readr::datasource(file)
switch(class(spec)[1],
source_file = df_parse_dta_file(spec, encoding, cols_skip, n_max, skip, name_repair = .name_repair),
source_raw = df_parse_dta_raw(spec, encoding, cols_skip, n_max, skip, name_repair = .name_repair),
data <- switch(class(spec)[1],
source_file = df_parse_dta_file(spec, encoding, cols$skip, n_max, skip),
source_raw = df_parse_dta_raw(spec, encoding, cols$skip, n_max, skip),
cli_abort("This kind of input is not handled.")
)

output_cols(data, cols, .name_repair)
}

#' @export
Expand Down
30 changes: 23 additions & 7 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,36 @@ force_utc <- function(x) {
}
}

skip_cols <- function(reader, col_select = NULL, ..., call = caller_env()) {
select_cols <- function(reader, col_select = NULL, ..., call = caller_env()) {
col_select <- enquo(col_select)
if (quo_is_null(col_select)) {
return(character())
return(list(pos = NULL, skip_index = integer()))
}

cols <- names(reader(..., n_max = 0L))
sels <- tidyselect::vars_select(cols, !!col_select)
data <- as.list(setNames(seq_along(cols), cols))

pos <- tidyselect::eval_select(
col_select,
data = data,
allow_rename = TRUE,
allow_empty = FALSE,
allow_predicates = FALSE,
error_call = call
)

list(
select = pos,
skip = as.integer(setdiff(seq_along(cols), pos) - 1L)
)
}

if (length(sels) == 0) {
cli_abort("Can't find any columns matching {.arg col_select} in data.", call = call)
output_cols <- function(data, cols, name_repair, call = caller_env()) {
if (is.null(cols$select)) {
set_names(data, vctrs::vec_as_names(names(data), repair = name_repair, call = call))
} else {
set_names(data[rank(cols$select)], names(cols$select))
}

setdiff(cols, sels)
}

validate_n_max <- function(n, call = caller_env()) {
Expand Down
12 changes: 8 additions & 4 deletions man/read_dta.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 8 additions & 4 deletions man/read_sas.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 8 additions & 4 deletions man/read_spss.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 8 additions & 4 deletions man/read_xpt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading