@@ -152,6 +152,9 @@ write_dfb_file <- function (txt, f, zip=TRUE,
152152# ' decimal place, yielding a somewhat sparser doc-topics matrix (the topic-word
153153# ' matrix is more aggressively truncated anyway). Set to NULL for no rounding.
154154# ' Rounded weights are renormalized within dfr-browser itself.
155+ # ' @param permute if non-NULL, specifies a renumbering of the topics: the new
156+ # ' topic \code{k} is old topic \code{permute[k]}. (If you have the inverse, use
157+ # ' \code{\link{order}(permute)} to invert it back.)
155158# '
156159# ' @examples
157160# '
@@ -185,7 +188,8 @@ export_browser_data <- function (m, out_dir, zipped=TRUE,
185188 internalize = FALSE ,
186189 info = NULL ,
187190 proper = FALSE ,
188- digits = getOption(" digits" )) {
191+ digits = getOption(" digits" ),
192+ permute = NULL ) {
189193 if (! requireNamespace(" jsonlite" , quietly = TRUE )) {
190194 stop(" jsonlite package required for browser export. Install from CRAN." )
191195 }
@@ -237,6 +241,12 @@ Set overwrite=TRUE to overwrite existing files."
237241 }
238242 }
239243
244+ # validate permute
245+ if (! is.null(permute ) && ! identical(sort(permute ), 1 : n_topics(m ))) {
246+ warning(" ignoring invalid permute parameter" )
247+ permute <- NULL
248+ }
249+
240250 if (proper ) {
241251 keys <- top_words(m , n_top_words , tw_smooth_normalize(m ))
242252 if (! is.null(keys ) && is.numeric(digits )) {
@@ -252,7 +262,8 @@ Set overwrite=TRUE to overwrite existing files."
252262 alpha = hyperparameters(m )$ alpha ,
253263 digits = digits , # irrelevant unless proper is TRUE
254264 overwrite = overwrite || internalize ,
255- index = index
265+ index = index ,
266+ permute
256267 )
257268 } else {
258269 warning(" Topic top words unavailable; unable to write tw.json" )
@@ -276,7 +287,8 @@ Set overwrite=TRUE to overwrite existing files."
276287 dtm = dtm ,
277288 digits = digits , # irrelevant unless proper is TRUE
278289 zip = zipped ,
279- overwrite = overwrite || internalize , index = index
290+ overwrite = overwrite || internalize , index = index ,
291+ permute
280292 )
281293 } else {
282294 warning(" Document topics unavailable; unable to write dt.json.zip" )
@@ -318,7 +330,8 @@ display may not work as expected. See ?export_browser_data for details."
318330 file = paste0(file.path(out_dir , " topic_scaled" ), " .csv" ),
319331 scaled = topic_scaled_2d(m , n_scaled_words ),
320332 overwrite = overwrite || internalize ,
321- index = index
333+ index = index ,
334+ permute
322335 )
323336 } else {
324337 warning(
@@ -362,13 +375,20 @@ display may not work as expected. See ?export_browser_data for details."
362375# ' @param overwrite clobber existing file?
363376# ' @param index if non-NULL, output is assumed to go into an element with ID
364377# ' \code{m__DATA__tw} in an HTML file at this path. \code{file} is ignored.
378+ # ' @param permute if non-NULL, exported topic \code{k} will correspond to the
379+ # ' topic numbered \code{permute[k]} in the data
365380# '
366381# ' @seealso \code{\link{export_browser_data}} for a more automated export of
367382# ' all model information at once
368383# ' @export
369384# '
370385export_browser_topic_words <- function (file , keys , alpha , digits = 4 ,
371- overwrite , index ) {
386+ overwrite , index ,
387+ permute ) {
388+ if (! is.null(permute )) {
389+ keys $ topic <- match(keys $ topic , permute )
390+ alpha <- alpha [permute ]
391+ }
372392 keys <- dplyr :: arrange_(keys , ~ topic , ~ desc(weight ))
373393 n_top_words <- nrow(keys ) / length(alpha )
374394 if (! is.null(index )) {
@@ -403,14 +423,19 @@ export_browser_topic_words <- function (file, keys, alpha, digits=4,
403423# ' @param overwrite clobber existing file?
404424# ' @param index if non-NULL, output is assumed to go into an element with ID
405425# ' \code{m__DATA__dt} in an HTML file at this path. \code{file} is ignored.
426+ # ' @param permute if non-NULL, exported topic \code{k} will correspond to the
427+ # ' topic numbered \code{permute[k]} in the data
406428# '
407429# ' @seealso \code{\link{export_browser_data}} for a more automated export of
408430# ' all model information at once
409431# ' @export
410432# '
411433export_browser_doc_topics <- function (file , dtm , digits = 4 ,
412- zipped , overwrite , index ) {
434+ zipped , overwrite , index , permute ) {
413435 dtm <- as(dtm , " CsparseMatrix" )
436+ if (! is.null(permute )) {
437+ dtm <- dtm [ , permute ]
438+ }
414439 if (! is.null(index )) {
415440 file <- " dt.json"
416441 }
@@ -467,15 +492,21 @@ export_browser_metadata <- function (file, meta, zipped, overwrite, index) {
467492# ' @param index if non-NULL, output is assumed to go into an element with ID
468493# ' \code{m__DATA__topic_scaled} in an HTML file at this path. \code{file} is
469494# ' ignored.
495+ # ' @param permute if non-NULL, exported topic \code{k} will correspond to the
496+ # ' topic numbered \code{permute[k]} in the data
470497# '
471498# ' @seealso \code{\link{export_browser_data}} for a more automated export of
472499# ' all model information at once
473500# ' @export
474501# '
475- export_browser_topic_scaled <- function (file , scaled , overwrite , index ) {
502+ export_browser_topic_scaled <- function (file , scaled , overwrite , index ,
503+ permute ) {
476504 if (! is.null(index )) {
477505 file <- " topic_scaled.csv"
478506 }
507+ if (! is.null(permute )) {
508+ scaled <- scaled [permute ]
509+ }
479510 write_dfb_file(capture.output(
480511 write.table(scaled , quote = FALSE , sep = " ," , row.names = FALSE ,
481512 col.names = FALSE )
@@ -547,8 +578,8 @@ export_browser_info <- function (file, info, overwrite, index) {
547578# ' @param internalize if TRUE, model data is in the browser home page rather
548579# ' than separate files. See Details.
549580# ' @param ... passed on to \code{\link{export_browser_data}}, q.v., especially
550- # ' the parameters \code{overwrite}, \code{n_scaled_words}, \code{info}, and
551- # ' \code{proper}
581+ # ' the parameters \code{overwrite}, \code{n_scaled_words}, \code{info},
582+ # ' \code{proper}, and \code{permute}
552583# '
553584# ' @seealso \code{\link{export_browser_data}} which does the work of exporting
554585# ' files, \code{\link{model_dfr_documents}}, \code{\link{train_model}},
@@ -561,6 +592,14 @@ export_browser_info <- function (file, info, overwrite, index) {
561592# ' "stoplist.txt", n_topics=40)
562593# ' # launch browser
563594# ' dfr_browser(m)
595+ # '
596+ # ' # generate a second model and align its topics with the first for more
597+ # ' # convenient comparisons
598+ # ' m2 <- model_dfr_documents("citations.CSV", "wordcounts",
599+ # ' "stoplist.txt", n_topics=40)
600+ # ' cl <- model_distances(list(m, m2), n_words=40) %>% align_topics()
601+ # ' dfr_browser(m2, permute=match(cl$clusters[[1]], cl$clusters[[2]])))
602+ # '
564603# ' }
565604# '
566605# ' @export
0 commit comments