|
3 | 3 | #' @description Creates a data frame from an exported 'WhatsApp' chat log containing one row per message. Some columns |
4 | 4 | #' are saved as lists using the I() function so that multiple elements can be stored per message while still maintaining |
5 | 5 | #' the general structure of one row per message. These columns should be treated as lists or unlisted first. |
6 | | -#' @param path Character string containing the file path to the exported 'WhatsApp' chat log as a .txt file. |
7 | | -#' @param os Operating system of the phone the chat was exported from. Default "auto" tries to automatically detect the OS. Also supports "android" or "iOS". |
| 6 | +#' @param path Character string containing the file path to the exported 'WhatsApp' chat log as a .txt file or .zip folder. |
| 7 | +#' @param os Operating system of the phone the chat was exported from. Default "auto" tries to automatically detect the OS. Also supports "android" or "ios". |
8 | 8 | #' @param language Indicates the language setting of the phone with which the messages were exported. Default is "auto" trying to match either 'English' or 'German'. More languages might be supported in the future. |
9 | 9 | #' @param anonymize TRUE results in the vector of sender names being anonymized and columns containing personal identifiable information to be deleted or restricted, FALSE displays the actual names and all content, "add" adds |
10 | 10 | #' anonomized columns to the full info columns. Do not blindly trust this and always double check. |
@@ -42,15 +42,30 @@ parse_chat <- function(path, |
42 | 42 |
|
43 | 43 | # Input checking |
44 | 44 | if (!file.exists(path)) {stop("'path' must be a valid file path to an exported 'WhatsApp' chatlog in .txt format")} |
45 | | - if (!(os == "auto" | os == "android" | os == "android")) {stop("'os' must either be 'android','ios', or 'auto'")} |
| 45 | + if (!(os == "auto" | os == "android" | os == "ios")) {stop("'os' must either be 'android','ios', or 'auto'")} |
46 | 46 | if (!(language == "auto" | language == "english" | language == "german")) {stop("'language' must be either 'english', 'german', or 'auto'")} |
47 | 47 | if (!(is.logical(anonymize) | anonymize == "add")) {stop("'anonymize' must be either TRUE, FALSE, or 'add'")} |
48 | | - if (!(is.character(consent) | is.na(consent))) {stop("'consent' must bei either NA or a character vector")} |
| 48 | + if (!(is.character(consent) | is.na(consent))) {stop("'consent' must be either NA or a character vector")} |
49 | 49 | if (!(emoji_dictionary == "internal" | file.exists(emoji_dictionary))) {stop("'emoji_dictionary' must be 'internal' or valid path to a dictionary scraped using download_emoji()")}# TODO |
50 | 50 | if (!(smilie_dictionary == "emoticons" | smilie_dictionary == "wikipedia")) {stop("'smilie_dictionary' must be 'emoticons' or 'wikipedia'")} |
51 | 51 | if (!is.character(rpnl)) {stop("'rpnl' must be a character string")} |
52 | 52 | if (!is.logical(verbose)) {stop("'verbose' must be either TRUE or FALSE")} |
53 | 53 |
|
| 54 | + # accept .txt or .zip (containing one or more .txt) |
| 55 | + if (!file.exists(path)) stop("'path' must be a valid path to a .txt or .zip file with a WhatsApp chat export") |
| 56 | + if (grepl("\\.zip$", path, ignore.case = TRUE)) { |
| 57 | + z <- utils::unzip(path, list = TRUE) |
| 58 | + txt <- z$Name[grepl("\\.txt$", z$Name, ignore.case = TRUE)] |
| 59 | + if (!length(txt)) stop("No .txt found inside the .zip export.") |
| 60 | + tmpdir <- file.path(tempdir(), "whatsr_zip") |
| 61 | + utils::unzip(path, files = txt, exdir = tmpdir, overwrite = TRUE) |
| 62 | + # If multiple txts exist, choose the largest by size (usually the chat) |
| 63 | + files <- file.path(tmpdir, txt) |
| 64 | + sizes <- file.info(files)$size |
| 65 | + path <- files[which.max(sizes)] |
| 66 | + if (verbose) cat(sprintf("Detected chat log file: %s\n", basename(path))) |
| 67 | + } |
| 68 | + |
54 | 69 | # Importing raw chat file |
55 | 70 | RawChat <- readChar(path, file.info(path)$size) |
56 | 71 |
|
|
0 commit comments