Merge pull request #125 from koheiw/remove-experimental

koheiw · web-flow · commit dd3dfbed82ba · 2026-04-06T15:50:42.000+09:00
Remove experimental functions
diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,7 @@
 ## Changes in v1.5.2
 
 * Add `nested_weight` to `textmodel_lss()` and `as.textmodel_lss()` to perform dictionary-like analysis.
-* Remove `auto_weight` from `textmodel_lss()`.
+* Remove `auto_weight` from `textmodel_lss()` and `cut` from `predict()`.
 
 ## Changes in v1.5.1
 
diff --git a/R/predict.R b/R/predict.R
@@ -5,19 +5,14 @@
 #' @param newdata a dfm on which prediction should be made.
 #' @param se_fit if `TRUE`, returns standard error of document scores.
 #' @param density if `TRUE`, returns frequency of polarity words in documents.
-#' @param cut a vector of one or two percentile values to dichotomized polarty
-#'   scores of words. When two values are given, words between them receive zero
-#'   polarity.
 #' @param rescale if `TRUE`, normalizes polarity scores using `scale()`.
 #' @param min_n set the minimum number of polarity words in documents.
 #' @param ... not used
 #' @details Polarity scores of documents are the means of polarity scores of
 #'   words weighted by their frequency. When `se_fit = TRUE`, this function
 #'   returns the weighted means, their standard errors, and the number of
 #'   polarity words in the documents. When `rescale = TRUE`, it converts the raw
-#'   polarity scores to z sores for easier interpretation. When `rescale =
-#'   FALSE` and `cut` is used, polarity scores of documents are bounded by
-#'   \[-1.0, 1.0\].
+#'   polarity scores to z sores for easier interpretation.
 #'
 #'   Documents tend to receive extreme polarity scores when they have only few
 #'   polarity words. This is problematic when LSS is applied to short documents
@@ -30,7 +25,7 @@
 #' @export
 predict.textmodel_lss <- function(object, newdata = NULL, se_fit = FALSE,
                                   density = FALSE, rescale = TRUE,
-                                  cut = NULL, min_n = 0L, ...){
+                                  min_n = 0L, ...){
 
 
     (function(se.fit, recaling, ...) unused_dots(...))(...) # trap deprecated args
@@ -45,11 +40,6 @@ predict.textmodel_lss <- function(object, newdata = NULL, se_fit = FALSE,
     }
     min_n <- check_integer(min_n, min = 0)
 
-    if (!is.null(cut)) {
-        cut <- check_double(cut, min = 0, max = 1, min_len = 1, max_len = 2)
-        object$beta <- cut_beta(object$beta, cut)
-    }
-
     beta <- Matrix(object$beta, nrow = 1, sparse = TRUE,
                    dimnames = list(NULL, names(object$beta)))
 
@@ -101,13 +91,4 @@ predict.textmodel_lss <- function(object, newdata = NULL, se_fit = FALSE,
     }
 }
 
-cut_beta <- function(x, p = 0.5) {
-    q <- c(-Inf, quantile(x, p, na.rm = TRUE), Inf)
-    v <- as.integer(cut(x, q))
-    beta <- double(length(x))
-    beta[v == min(v)] <- -1.0
-    beta[v == max(v)] <- 1.0
-    names(beta) <- names(x)
-    return(beta)
-}
 
diff --git a/man/predict.textmodel_lss.Rd b/man/predict.textmodel_lss.Rd
diff --git a/tests/testthat/test-textmodel_lss.R b/tests/testthat/test-textmodel_lss.R
@@ -399,78 +399,6 @@ test_that("se_fit is working", {
     expect_identical(pred1, pred2)
 })
 
-test_that("cut is working", {
-
-    skip_on_cran() # takes to much time
-
-    p0 <- predict(lss_test, rescale = TRUE, min_n = 10)
-    p1 <- predict(lss_test, cut = 0.5, rescale = TRUE)
-    expect_true(min(p1, na.rm = TRUE) < -1)
-    expect_true(max(p1, na.rm = TRUE) > 1)
-    expect_equal(cor(p0, p1, use = "pair"), 0.59, tolerance = 0.01)
-
-    p2 <- predict(lss_test, cut = 0.5, rescale = FALSE)
-    expect_true(min(p2, na.rm = TRUE) >= -1)
-    expect_true(max(p2, na.rm = TRUE) <= 1)
-    expect_equal(cor(p0, p2, use = "pair"), 0.59, tolerance = 0.01)
-
-    p3 <- predict(lss_test, cut = 0.5, rescale = FALSE, min_n = 10)
-    expect_true(min(p3, na.rm = TRUE) >= -1)
-    expect_true(max(p3, na.rm = TRUE) <= 1)
-    expect_equal(cor(p0, p3, use = "pair"), 0.73, tolerance = 0.01)
-
-    p4 <- predict(lss_test, cut = 0.75, rescale = FALSE, min_n = 10)
-    expect_true(min(p4, na.rm = TRUE) >= -1)
-    expect_true(max(p4, na.rm = TRUE) <= 1)
-    expect_equal(cor(p0, p4, use = "pair"), 0.33, tolerance = 0.01)
-
-    p5 <- predict(lss_test, cut = c(0.25, 0.75), rescale = FALSE, min_n = 10)
-    expect_true(min(p5, na.rm = TRUE) >= -1)
-    expect_true(max(p5, na.rm = TRUE) <= 1)
-    expect_equal(cor(p0, p5, use = "pair"), 0.77, tolerance = 0.01)
-
-    p6 <- predict(lss_test, cut = c(0.75, 0.25), rescale = FALSE, min_n = 10)
-    expect_identical(p5, p6)
-
-    expect_error(
-        predict(lss_test, cut = 1.5),
-        "The value of cut must be between 0 and 1"
-    )
-    expect_error(
-        predict(lss_test, cut = -0.1),
-        "The value of cut must be between 0 and 1"
-    )
-    expect_error(
-        predict(lss_test, cut = c(0.1, 0.5, 0.9)),
-        "The length of cut must be between 1 and 2"
-    )
-
-    expect_equal(
-        LSX:::cut_beta(c(1.1, -1.2, 0.5, 0.3, -0.2, -0.5)),
-        c(1, -1, 1, 1, -1, -1)
-    )
-    expect_equal(
-        LSX:::cut_beta(c(1.1, -1.2, 0.5, 0.3, -0.2, -0.5), c(0.2, 0.8)),
-        c(1, -1, 0, 0, 0, -1)
-    )
-
-    beta <- rnorm(nfeat(dfmt_test), sd = 0.1)
-    names(beta) <- featnames(dfmt_test)
-    beta2 <- LSX:::cut_beta(beta, c(0.2, 0.8))
-
-    lss1 <- as.textmodel_lss(beta)
-    lss2 <- as.textmodel_lss(beta2)
-    expect_equal(names(lss1$beta), names(lss2$beta))
-
-    pred0 <- predict(lss1, dfmt_test, se_fit = TRUE)
-    pred1 <- predict(lss1, dfmt_test, cut = c(0.2, 0.8), se_fit = TRUE)
-    pred2 <- predict(lss2, dfmt_test, se_fit = TRUE)
-
-    expect_equal(pred0$n, pred1$n)
-    expect_equal(pred0$n, pred2$n)
-    expect_equal(pred1$fit, pred2$fit)
-})
-
 test_that("rescaling still works", {
 
     expect_warning({