Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 63 additions & 15 deletions R/textmodel_wordfish.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
#' tolerance in the difference in parameter values from the iterative
#' conditional maximum likelihood (from conditionally estimating
#' document-level, then feature-level parameters).
#' @param method specifies whether to use the Poisson model or the negative
#' binomial model
#' @param dim2 a boolean variable that specifies whether to estimate the second
#' dimension of theta and beta
#' @param prior_values NEEDS DOCUMENTING
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a description here.

#' @param dispersion sets whether a quasi-Poisson quasi-likelihood should be
#' used based on a single dispersion parameter (`"poisson"`), or
#' quasi-Poisson (`"quasipoisson"`)
Expand Down Expand Up @@ -75,14 +80,16 @@
#' \dontrun{
#' library("quanteda")
#' dfmat <- dfm(tokens(data_corpus_irishbudget2010))
#' (tmod2 <- textmodel_wordfish(dfmat, dir = c(6,5)))
#' (tmod3 <- textmodel_wordfish(dfmat, dir = c(6,5),
#' (tmod2 <- textmodel_wordfish(dfmat, dir = c(6, 5)))
#' (tmod3 <- textmodel_wordfish(dfmat, dir = c(6, 5),
#' dispersion = "quasipoisson", dispersion_floor = 0))
#' (tmod4 <- textmodel_wordfish(dfmat, dir = c(6,5),
#' (tmod4 <- textmodel_wordfish(dfmat, dir = c(6, 5),
#' dispersion = "quasipoisson", dispersion_floor = .5))
#' plot(tmod3$phi, tmod4$phi, xlab = "Min underdispersion = 0", ylab = "Min underdispersion = .5",
#' plot(tmod3$phi, tmod4$phi,
#' xlab = "Min underdispersion = 0", ylab = "Min underdispersion = .5",
#' xlim = c(0, 1.0), ylim = c(0, 1.0))
#' plot(tmod3$phi, tmod4$phi, xlab = "Min underdispersion = 0", ylab = "Min underdispersion = .5",
#' plot(tmod3$phi, tmod4$phi,
#' xlab = "Min underdispersion = 0", ylab = "Min underdispersion = .5",
#' xlim = c(0, 1.0), ylim = c(0, 1.0), type = "n")
#' underdispersedTerms <- sample(which(tmod3$phi < 1.0), 5)
#' which(featnames(dfmat) %in% names(topfeatures(dfmat, 20)))
Expand All @@ -100,6 +107,9 @@
textmodel_wordfish <- function(x, dir = c(1, 2),
priors = c(Inf, Inf, 3, 1),
tol = c(1e-6, 1e-8),
method = "Poisson",
dim2 = FALSE,
prior_values = NULL,
dispersion = c("poisson", "quasipoisson"),
dispersion_level = c("feature", "overall"),
dispersion_floor = 0,
Expand All @@ -114,6 +124,9 @@ textmodel_wordfish <- function(x, dir = c(1, 2),
textmodel_wordfish.default <- function(x, dir = c(1, 2),
priors = c(Inf, Inf, 3, 1),
tol = c(1e-6, 1e-8),
method = "Poisson",
dim2 = FALSE,
prior_values = NULL,
dispersion = c("poisson", "quasipoisson"),
dispersion_level = c("feature", "overall"),
dispersion_floor = 0,
Expand All @@ -128,6 +141,9 @@ textmodel_wordfish.default <- function(x, dir = c(1, 2),
textmodel_wordfish.dfm <- function(x, dir = c(1, 2),
priors = c(Inf, Inf, 3, 1),
tol = c(1e-6, 1e-8),
method = "Poisson",
dim2 = FALSE,
prior_values = NULL,
dispersion = c("poisson", "quasipoisson"),
dispersion_level = c("feature", "overall"),
dispersion_floor = 0,
Expand Down Expand Up @@ -187,19 +203,41 @@ textmodel_wordfish.dfm <- function(x, dir = c(1, 2),
} else {
stop("Illegal option combination.")
}
if (sparse == TRUE) {

if (method == "Poisson") {

if (dim2 == TRUE) {
priors <- c(priors, 0.5)
}else if (dim2 == FALSE) {
priors <- c(priors, 1)
}
if (sparse == TRUE) {
result <- qatd_cpp_wordfish(x, as.integer(dir), 1 / (priors ^ 2),
tol, disp,
dispersion_floor, abs_err, svd_sparse,
residual_floor)
} else{
} else{
result <- qatd_cpp_wordfish_dense(as.matrix(x),
as.integer(dir), 1 / (priors ^ 2),
tol, disp,
dispersion_floor, abs_err)
}
# NOTE: psi is a 1 x nfeat matrix, not a numeric vector
# alpha is a ndoc x 1 matrix, not a numeric vector
}
# NOTE: psi is a 1 x nfeat matrix, not a numeric vector
# alpha is a ndoc x 1 matrix, not a numeric vector
}else if (method == "NB") {
if (dim2 == TRUE) {
priors <- c(priors, 0.01)
} else if (dim2 == FALSE) {
priors <- c(priors, 0.1)
}

result <- qatd_cpp_wordfish_dense(as.matrix(x),
as.integer(dir), 1 / (priors ^ 2),
tol, disp,
dispersion_floor, abs_err)
}


if (any(is.nan(result$theta)))
warning("Warning: The algorithm did not converge.")

Expand All @@ -215,13 +253,23 @@ textmodel_wordfish.dfm <- function(x, dir = c(1, 2),
psi = as.numeric(result$psi),
alpha = as.numeric(result$alpha),
phi = as.numeric(result$phi),
se.theta = as.numeric(result$thetaSE) ,
zeta = as.numeric(result$zeta),
theta2 = as.numeric(result$theta2),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It probably makes more sense for the 2D case to return theta as an ndoc x 2 matrix instead of as two vectors of length ndoc. Same for the other parameters and SEs.

beta2 = as.numeric(result$beta2),
se.alpha = as.numeric(result$alphaSE),
se.theta = as.numeric(result$thetaSE),
se.psi = as.numeric(result$psiSE),
se.beta = as.numeric(result$betaSE),
se.theta2 = as.numeric(result$theta2SE),
se.beta2 = as.numeric(result$beta2SE),
lp = as.numeric(result$LogL),
call = match.call()
)
class(result) <- c("textmodel_wordfish", "textmodel", "list")
result
}


#' Prediction from a textmodel_wordfish method
#'
#' `predict.textmodel_wordfish()` returns estimated document scores and
Expand Down Expand Up @@ -273,7 +321,7 @@ print.textmodel_wordfish <- function(x, ...) {
print(x$call)
cat("\n",
"Dispersion: ", x$dispersion, "; ",
"direction: ", x$dir[1], ' < ' , x$dir[2], "; ",
"direction: ", x$dir[1], " < ", x$dir[2], "; ",
ndoc(x), " documents; ",
nfeat(x), " features.",
"\n",
Expand All @@ -298,9 +346,9 @@ summary.textmodel_wordfish <- function(object, n = 30, ...) {
)

result <- list(
'call' = object$call,
'estimated.document.positions' = as.statistics_textmodel(stat),
'estimated.feature.scores' = as.coefficients_textmodel(head(coef(object)$features, n))
"call" = object$call,
"estimated.document.positions" = as.statistics_textmodel(stat),
"estimated.feature.scores" = as.coefficients_textmodel(head(coef(object)$features, n))
)
return(as.summary.textmodel(result))
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

predict will need modification for the 2D case.

Expand Down
19 changes: 16 additions & 3 deletions man/textmodel_wordfish.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading