diff --git a/NEWS.md b/NEWS.md index 397b6711..08c1d82c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ ISO 8061 convention where positive offset denotes time zone east of Greenwich. (#946) +* `varchar()` helper now returns number of bytes not characters (#960) + # odbc 1.6.3 * Addressed a compiler warning on `r-devel-linux-x86_64-fedora-clang` (#941). diff --git a/R/aaa-odbc-data-type.R b/R/aaa-odbc-data-type.R index 9d46fa2f..9018d17d 100644 --- a/R/aaa-odbc-data-type.R +++ b/R/aaa-odbc-data-type.R @@ -118,7 +118,7 @@ is_blob <- function(obj) { varchar <- function(x, type = "varchar") { # at least 255 characters, use max if more than 8000: - max_length <- max(c(255, nchar(as.character(x))), na.rm = TRUE) + max_length <- max(c(255, nchar(as.character(x), type = "bytes")), na.rm = TRUE) if (max_length > 8000) { max_length <- "max" diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 7827298f..80c24c51 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -383,3 +383,9 @@ test_that("configure_unixodbc_simba() writes reasonable entries", { action = "warn" )) }) + +test_that("varchar() uses byte length (avoids truncation with multibyte)", { + # U+2019 RIGHT SINGLE QUOTATION MARK in UTF-8 is 3 bytes + string <- paste0("\xe2\x80\x99", paste(rep("a", 255), collapse = "")) + expect_equal(varchar(string, type = "varchar"), "varchar(258)") +})