Skip to content

Commit b90a98a

Browse files
authored
Merge branch 'main' into col_select
2 parents 52c5bbe + 838956e commit b90a98a

File tree

4 files changed

+69
-15
lines changed

4 files changed

+69
-15
lines changed

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66

77
* `col_select` works correctly when combined with `.name_repair` for files with
88
duplicate column names (#687).
9+
10+
* When a string variable has a date and/or time format `read_*()` functions now
11+
warn and treat the variable as a plain string instead of throwing an error
12+
(#747). This should not normally occur, but has been observed in files
13+
produced by 3rd party software.
914

1015
* `write_*()` functions now take into account the width of value labels when
1116
calculating string variable widths. Previously it was possible to create

R/update.R

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,32 @@ update_readstat <- function(branch = "master") {
1717
fs::file_copy(fs::path(base, "LICENSE"), out_dir)
1818
fs::file_copy(fs::path(base, "NEWS"), out_dir)
1919

20+
apply_iconv_hack()
21+
2022
invisible()
2123
}
24+
25+
apply_iconv_hack <- function() {
26+
path <- fs::path("src", "readstat", "readstat_iconv.h")
27+
lines <- readLines(path)
28+
29+
# Replace the autotools ICONV_CONST fallback with platform-specific logic
30+
# Also update the comment to reflect that we're manually hacking this
31+
ifndef_line <- which(lines == "#ifndef ICONV_CONST")
32+
comment_line <- grep("^/\\* ICONV_CONST", lines)
33+
34+
if (length(ifndef_line) == 1 && length(comment_line) == 1) {
35+
lines <- c(
36+
lines[1:(comment_line - 1)],
37+
"/* ICONV_CONST defined by autotools; so we hack this in manually */",
38+
"#if defined(_WIN32) || defined(__sun)",
39+
" #define ICONV_CONST const",
40+
"#else",
41+
" #define ICONV_CONST",
42+
"#endif",
43+
lines[(ifndef_line + 3):length(lines)]
44+
)
45+
writeLines(lines, path)
46+
}
47+
}
2248
# nocov end

src/DfReader.cpp

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -230,20 +230,26 @@ class DfReader {
230230
VarType var_type = numType(vendor_, var_format);
231231
// Rcout << name << ": " << var_format << " [" << var_type << "]\n";
232232
var_types_[var_index] = var_type;
233-
switch(var_type) {
234-
case HAVEN_DATE:
235-
col.attr("class") = "Date";
236-
break;
237-
case HAVEN_TIME:
238-
col.attr("class") = {"hms", "difftime"};
239-
col.attr("units") = "secs";
240-
break;
241-
case HAVEN_DATETIME:
242-
col.attr("class") = {"POSIXct", "POSIXt"};
243-
col.attr("tzone") = "UTC";
244-
break;
245-
default:
246-
break;
233+
// Files have been observed in the wild with a date format applied to a string variable.
234+
// This should not be possible, so we throw a warning, don't apply a class and continue.
235+
if (var_type != HAVEN_DEFAULT && readstat_variable_get_type_class(variable) == READSTAT_TYPE_CLASS_STRING) {
236+
Rf_warning("String variable '%s' has incompatible format '%s' and will be returned as a regular string variable.", name, var_format);
237+
} else {
238+
switch (var_type) {
239+
case HAVEN_DATE:
240+
col.attr("class") = "Date";
241+
break;
242+
case HAVEN_TIME:
243+
col.attr("class") = {"hms", "difftime"};
244+
col.attr("units") = "secs";
245+
break;
246+
case HAVEN_DATETIME:
247+
col.attr("class") = {"POSIXct", "POSIXt"};
248+
col.attr("tzone") = "UTC";
249+
break;
250+
default:
251+
break;
252+
}
247253
}
248254

249255
// User defined missing values

tests/testthat/test-haven-sas.R

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ test_that("col_select works with .name_repair and renaming for duplicate names",
156156
write_xpt(df, path)
157157
df <- tibble::as_tibble(df, .name_repair = "universal")
158158

159-
# This previously crashed with "attempt to set index 1/1 in SET_STRING_ELT"
159+
# This previously crashed with "attempt to set index 1/1 in SET_STRING_ELT"
160160
res <- read_xpt(path, col_select = id...1, .name_repair = "universal")
161161
expect_equal(res, df[1])
162162

@@ -167,6 +167,23 @@ test_that("col_select works with .name_repair and renaming for duplicate names",
167167
# Test renaming
168168
res3 <- read_xpt(path, col_select = c(a = id...3, b = id...1), .name_repair = "universal")
169169
expect_equal(res3, set_names(df[c(3, 1)], c("a", "b")))
170+
}
171+
172+
test_that("date/times with character data throw a warning (#747)", {
173+
df = data.frame(
174+
id = 1:2,
175+
date = structure(c("20424", "20487"), label = "Date", class = "Date")
176+
) #would not work with tibble()
177+
178+
path <- tempfile()
179+
write_xpt(df, path)
180+
181+
expect_warning(
182+
out <- read_xpt(path),
183+
"will be returned as a regular string variable"
184+
)
185+
186+
expect_equal(out$date, structure(c("20424", "20487"), label = "Date", format.sas = "DATE"))
170187
})
171188

172189
# write_xpt ---------------------------------------------------------------

0 commit comments

Comments
 (0)