Skip to content

Commit 49d269c

Browse files
committed
further updated create_chatlog() to create correct testing chats
1 parent f20e8f9 commit 49d269c

File tree

3 files changed

+50
-27
lines changed

3 files changed

+50
-27
lines changed

R/create_chatlog.R

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -205,32 +205,53 @@ create_chatlog <- function(n_messages = 250,
205205

206206
#### Formatting Timestamps ####
207207

208+
# TODO: Fix the German ampm thing!
209+
208210
if (language == "german") {
209211
if (os == "android") {
210212
if (time_format == "24h") {
213+
# German, Android, 24h
211214
ts <- strftime(ts, format = "%d.%m.%y, %H:%M - ", tz = "UTC")
212215
} else {
216+
# German, Android, AMPM
213217
ts <- strftime(ts, format = "%d.%m.%y, %I:%M %p - ", tz = "UTC")
214-
ts <- {m <- gregexpr("AM|PM", ts); regmatches(ts, m) <- lapply(regmatches(ts, m), \(x) sapply(x, \(y) sample(if(y == "AM") c("morgens","vorm.","mittags") else c("nachm.","abends","nachts"),1)))}
218+
219+
# Fixing german translations for German, Android, AMPM
220+
ts <- {
221+
m <- gregexpr("AM|PM", ts)
222+
regmatches(ts, m) <- lapply(regmatches(ts, m), function(x)
223+
sapply(x, function(y)
224+
sample(if (y == "AM") c("morgens","vorm.","mittags")
225+
else c("nachm.","abends","nachts"), 1)
226+
)
227+
)
228+
ts
229+
}
215230
}
216231
} else {
217232
if (time_format == "24h") {
233+
# German, IOS, 24h
218234
ts <- strftime(ts, format = "[%d.%m.%y, %H:%M:%S] ", tz = "UTC")
219235
} else {
236+
# German, IOS, AMPM
220237
ts <- strftime(ts, format = "[%m/%d/%y, %I:%M:%S %p] ", tz = "UTC")
221238
}
222239
}
223240
} else {
224241
if (os == "android") {
225242
if (time_format == "24h") {
243+
# English, Android, 24h
226244
ts <- strftime(ts, format = "%m/%d/%y, %H:%M - ", tz = "UTC")
227245
} else {
246+
# English, Android, AMPM
228247
ts <- strftime(ts, format = "%m/%d/%y, %I:%M %p - ", tz = "UTC")
229248
}
230249
} else {
231250
if (time_format == "24h") {
251+
# English, IOS, 24h
232252
ts <- strftime(ts, format = "[%m/%d/%y, %H:%M:%S] ", tz = "UTC")
233253
} else {
254+
# English, IOS, AMPM
234255
ts <- strftime(ts, format = "[%m/%d/%y, %I:%M:%S %p] ", tz = "UTC")
235256
}
236257
}
@@ -335,6 +356,7 @@ create_chatlog <- function(n_messages = 250,
335356
WAStrings[1] <- gsub("(","",WAStrings[1],fixed = TRUE)
336357
WAStrings[1] <- gsub(")","",WAStrings[1],fixed = TRUE)
337358
WAStrings[1] <- sample(unlist(strsplit(WAStrings[1],"|",fixed = TRUE)),1)
359+
if (WAStrings[1] == "Nachrichten und Anrufe sind Ende-zu-Ende-verschlüsselt. Nur Personen in diesem Chat können sie lesen, anhören oder teilen. Mehru00A0erfahren") {WAStrings[1] <- "Nachrichten und Anrufe sind Ende-zu-Ende-verschlüsselt. Nur Personen in diesem Chat können sie lesen, anhören oder teilen. Mehr\u00A0erfahren"}
338360
Messages[1] <- WAStrings[1]
339361

340362
WAStrings[2] <- gsub("(","",WAStrings[2],fixed = TRUE)
@@ -414,8 +436,6 @@ create_chatlog <- function(n_messages = 250,
414436
WAStrings[2] <- gsub(")","",WAStrings[2],fixed = TRUE)
415437
WAStrings[2] <- sample(unlist(strsplit(WAStrings[2],"|",fixed = TRUE)),1)
416438

417-
#
418-
419439
WAStrings[c(4)] <- sample(c(paste(c("Bild","Audio","Video","Videonachricht","GIF","Sticker"),"weggelassen"),"Kontaktkarte ausgelassen"),1)
420440

421441
WAStrings[7] <- gsub("(","",WAStrings[7],fixed = TRUE)
@@ -434,8 +454,6 @@ create_chatlog <- function(n_messages = 250,
434454
WAStrings[21] <- gsub(")","",WAStrings[21],fixed = TRUE)
435455
WAStrings[21] <- sample(unlist(strsplit(WAStrings[21],"|",fixed = TRUE)),1)
436456

437-
#
438-
439457
WAStrings[22] <- gsub("(","",WAStrings[22],fixed = TRUE)
440458
WAStrings[22] <- gsub(")","",WAStrings[22],fixed = TRUE)
441459
WAStrings[22] <- sample(unlist(strsplit(WAStrings[22],"|",fixed = TRUE)),1)
@@ -460,10 +478,9 @@ create_chatlog <- function(n_messages = 250,
460478
WAStrings[33] <- gsub("(?:","",WAStrings[33],fixed = TRUE)
461479
WAStrings[33] <- gsub(")?","",WAStrings[33],fixed = TRUE)
462480

463-
WAStrings[37] <- gsub("\\s+"," ",WAStrings[37],fixed = TRUE)
464-
WAStrings[37] <- gsub("start_newline","\n",WAStrings[37],fixed = TRUE)
481+
WAStrings[37] <- "Meta\xC2\xA0AI ist ein optionaler Dienst von Meta, der KI-Modelle verwendet, um Antworten bereitzustellen. Teile keine Informationen, insbesondere nicht zu sensiblen Themen, über Dritte oder dich selbst, von denen du nicht möchtest, dass die KI sie speichert und verwendet. Meta teilt Informationen mit ausgewählten Partnern, damit Meta\xC2\xA0AI relevante Antworten liefern kann. Deine Interaktionen mit KIs werden nicht verwendet, um die KI bei Meta zu verbessern. Erfahre mehr über die Meta-Datenschutzrichtlinie und deine Rechte.\n\nDeine Nutzung von WhatsApp unterliegt der Datenschutzrichtlinie von WhatsApp. Durch die Nutzung von Meta\xC2\xA0AI stimmst du den KI-Nutzungsbedingungen von Meta zu. *Nutzungsbedingungen und Richtlinien ansehen*"
465482

466-
WAStrings[38] <- paste0("Nur Nachrichten, die @Meta", "\u00A0", "AI erwähnen oder die Personen mit Meta", "\u00A0", "AI teilen, können von Meta gelesen werden.")
483+
WAStrings[38] <- "Nur Nachrichten, die @Meta AI erwähnen oder die Personen mit Meta AI teilen, können von Meta gelesen werden. Meta kann keine anderen Nachrichten in diesem Chat lesen, da deine persönlichen Nachrichten Ende-zu-Ende-verschlüsselt bleiben.\n\nNachrichten werden von einer KI generiert. Einige können falsch oder unangemessen sein."
467484

468485
WAStrings[39] <- gsub(" .*? "," 7 Tage ",WAStrings[39],fixed = TRUE)
469486
WAStrings[39] <- gsub(".*? ","Bob ",WAStrings[39],fixed = TRUE)
@@ -476,11 +493,12 @@ create_chatlog <- function(n_messages = 250,
476493

477494
WAStrings[41] <- gsub(".+?","Bob",WAStrings[41],fixed = TRUE)
478495

479-
WAStrings[42] <- gsub(".+?","Bob",WAStrings[42],fixed = TRUE)
496+
WAStrings[42] <- gsub(".*?","Bob",WAStrings[42],fixed = TRUE)
480497

481498
WAStrings[43] <- gsub("(?:","",WAStrings[43],fixed = TRUE)
482499
WAStrings[43] <- gsub(")?","",WAStrings[43],fixed = TRUE)
483500
WAStrings[43] <- gsub(".*?","Bob",WAStrings[43],fixed = TRUE)
501+
WAStrings[43] <- gsub("\\*","*",WAStrings[43],fixed = TRUE)
484502

485503
WAStrings[44] <- gsub(".*?","Bob",WAStrings[44],fixed = TRUE)
486504

@@ -654,6 +672,7 @@ create_chatlog <- function(n_messages = 250,
654672

655673
WAStrings[42] <- gsub(".*?","Bob",WAStrings[42],fixed = TRUE)
656674

675+
# TODO: FIX THIUS!
657676
WAStrings[43] <- gsub("(?:","",WAStrings[43],fixed = TRUE)
658677
WAStrings[43] <- gsub(")?","",WAStrings[43],fixed = TRUE)
659678
WAStrings[43] <- gsub(".*?","Bob",WAStrings[43],fixed = TRUE)
@@ -861,15 +880,19 @@ create_chatlog <- function(n_messages = 250,
861880

862881

863882
#### Pasting timestamps, names and messages together, based on OS structure
864-
# TODO: CONTINUE HERE
865883

866884
if (os == "ios") {
867885

886+
# IOS
887+
888+
# removing non-system-message lines
889+
sm_rows <- sm_rows[1:(length(sm_rows) - 4)]
890+
868891
# system messages with names
869-
Messages[sm_rows][c(1:4, 14:17, 19)] <- paste0(ts[sm_rows][c(1:4, 14:17, 19)], Names[sm_rows][c(1:4, 14:17, 19)], Messages[sm_rows][c(1:4, 14:17, 19)])
892+
Messages[sm_rows][c(1:46)] <- paste0(ts[sm_rows][c(1:46)], Names[sm_rows][c(1:46)], Messages[sm_rows][c(1:46)])
870893

871894
# system messages without names
872-
Messages[sm_rows][c(5:13,18,20)] <- paste0(ts[sm_rows][c(5:13,18,20)], Messages[sm_rows][c(5:13,18,20)])
895+
#Messages[sm_rows][c(5:13,18,20)] <- paste0(ts[sm_rows][c(5:13,18,20)], Messages[sm_rows][c(5:13,18,20)])
873896

874897
# other messages (with names)
875898
Messages[-c(1, sm_rows)] <- paste0(ts[-c(1, sm_rows)], Names[c(-c(1, sm_rows))], Messages[c(-c(1, sm_rows))])
@@ -879,11 +902,16 @@ create_chatlog <- function(n_messages = 250,
879902

880903
} else {
881904

905+
# ANDROID
906+
907+
# removing non-system-message lines
908+
sm_rows <- sm_rows[1:(length(sm_rows) - 4)]
909+
882910
# system messages with names
883-
Messages[sm_rows][c(1:4, 15:17, 19)] <- paste0(ts[sm_rows][c(1:4, 15:17, 19)], Names[sm_rows][c(1:4, 15:17, 19)], Messages[sm_rows][c(1:4, 15:17, 19)])
911+
Messages[sm_rows][c(1:4, 14,15,16,18, 20:26,29,31,32)] <- paste0(ts[sm_rows][c(1:4, 14,15,16,18, 20:26,29,31,32)], Names[sm_rows][c(1:4, 14,15,16,18, 20:26,29,31,32)], Messages[sm_rows][c(1:4, 14,15,16,18, 20:26,29,31,32)])
884912

885913
# system messages without names
886-
Messages[sm_rows][c(5:14,18,20)] <- paste0(ts[sm_rows][c(5:14,18,20)], Messages[sm_rows][c(5:14,18,20)])
914+
Messages[sm_rows][c(5:13,17,19,27,28,30,33:46)] <- paste0(ts[sm_rows][c(5:13,17,19,27,28,30,33:46)], Messages[sm_rows][c(5:13,17,19,27,28,30,33:46)])
887915

888916
# other messages (with names)
889917
Messages[-c(1, sm_rows)] <- paste0(ts[-c(1, sm_rows)], Names[c(-c(1, sm_rows))], Messages[c(-c(1, sm_rows))])

R/parse_chat.R

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,7 @@ parse_chat <- function(path,
5858
if (verbose) {cat("Imported raw chat file \U2713 \n")}
5959

6060
# Regex that detects 24h/ampm, american date format, european date format and all combinations for ios and android
61-
62-
# TODO: This is the current best working version
63-
# TimeRegex_android <- "(?!^)(?=((\\d{2}\\.\\d{2}\\.\\d{2})|(\\d{1,2}/\\d{1,2}/\\d{2,4})),\\s\\d{1,2}:\\d{2}(?:\\s*\\p{Zs}*\\s*(AM|PM))?\\s-)"
64-
# TimeRegex_ios <- c("(?!^)(?=\\[((\\d{2}\\.\\d{2}\\.\\d{2})|(\\d{1,2}\\/\\d{1,2}\\/\\d{2,4})),\\s\\d{1,2}\\:\\d{2}((\\:\\d{2}\\s(?i:(pm|am)))|(\\s(?i:(pm|am)))|(\\:\\d{2}\\])|(\\:\\d{2})|(\\s))\\])")
65-
66-
# TODO: This is an alternative version that doesnt work as well
67-
#TimeRegex_android <- c("(?!^)(?=((\\d{2}\\.\\d{2}\\.\\d{2})|(\\d{1,2}\\/\\d{1,2}\\/\\d{2,4})),\\s\\d{2}\\:\\d{2}((\\s\\-)|(\\s(?i:(am|pm))\\s\\-)))")
68-
#TimeRegex_ios <- TimeRegex_ios <- "(?!^)(?=\\[((\\d{2}\\.\\d{2}\\.\\d{2})|(\\d{1,2}[\\/\\-]\\d{1,2}[\\/\\-]\\d{2,4})),\\s\\d{1,2}\\:\\d{2}((\\:\\d{2}\\s(?i:(pm|am)))|(\\s(?i:(pm|am)))|(\\:\\d{2}\\])|(\\:\\d{2})|(\\s))\\])"
69-
70-
# TODO: This is the new version, also accounting for German AM/PM translations
61+
# This is the new version, also accounting for German AM/PM translations [See github history for older verisons]
7162
TimeRegex_android <- "(?!^)(?=((\\d{2}\\.\\d{2}\\.\\d{2})|(\\d{1,2}/\\d{1,2}/\\d{2,4})),\\s\\d{1,2}:\\d{2}(?:\\s*\\p{Zs}*\\s*(?i:(AM|PM|morgens|vorm\\.|mittags|nachm\\.|abends|nachts)))?\\s-)"
7263
TimeRegex_ios <- "(?!^)(?=\\[((\\d{2}\\.\\d{2}\\.\\d{2})|(\\d{1,2}\\/\\d{1,2}\\/\\d{2,4})),\\s\\d{1,2}:\\d{2}((\\:\\d{2}\\s(?i:(pm|am|morgens|vorm\\.|mittags|nachm\\.|abends|nachts)))|(\\s(?i:(pm|am|morgens|vorm\\.|mittags|nachm\\.|abends|nachts)))|(\\:\\d{2}\\])|(\\:\\d{2})|(\\s))\\])"
7364

@@ -97,7 +88,7 @@ parse_chat <- function(path,
9788
TimeRegex <- TimeRegex_android
9889
} else if (android_stamps == ios_stamps) {
9990

100-
cat("Operating System could not be detected automatically, please enter either 'ios' or 'android' without quatation marks and press enter")
91+
cat("Operating System could not be detected automatically, please enter either 'ios' or 'android' without quotation marks and press enter")
10192
os <- readline(prompt = "Enter operating system: ")
10293

10394
if (os == "android") {
@@ -135,6 +126,8 @@ parse_chat <- function(path,
135126
fileEncoding = "UTF-8"
136127
)
137128

129+
# TODO: something goes wrong here
130+
138131
# trying to auto-detect language
139132
if (language == "auto") {
140133

@@ -313,6 +306,8 @@ parse_chat <- function(path,
313306
NoAdminNow
314307
)
315308

309+
# TODO: This is where the last testing chat fails!
310+
316311
# checking whether a WhatsApp message was parsed into the sender column
317312
WAMessagePresent <- unlist(stri_extract_all_regex(str = ParsedChat$Sender, pattern = paste(WAStrings, collapse = "|")))
318313
ParsedChat$SystemMessage[!is.na(WAMessagePresent)] <- WAMessagePresent[!is.na(WAMessagePresent)]
@@ -650,7 +645,7 @@ parse_chat <- function(path,
650645
Emoji = I(Emoji),
651646
EmojiDescriptions = I(EmojiDescriptions),
652647
Smilies = I(Smilies),
653-
SystemMessage = SystemMessage,
648+
SystemMessage = SystemMessage, # Why is this NULL?
654649
stringsAsFactors = FALSE
655650
)
656651

0 commit comments

Comments
 (0)