Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions libs/langchain-community/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3406,6 +3406,17 @@
"default": "./dist/experimental/llms/chrome_ai.cjs"
}
},
"./chat_loaders/whatsapp": {
"input": "./src/chat_loaders/whatsapp.ts",
"import": {
"types": "./dist/chat_loaders/whatsapp.d.ts",
"default": "./dist/chat_loaders/whatsapp.js"
},
"require": {
"types": "./dist/chat_loaders/whatsapp.d.cts",
"default": "./dist/chat_loaders/whatsapp.cjs"
}
},
"./experimental/tools/pyinterpreter": {
"input": "./src/experimental/tools/pyinterpreter.ts",
"import": {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[8/15/23, 9:12:33 AM] Dr. Feather: ‎Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them.
[8/15/23, 9:12:43 AM] Dr. Feather: I spotted a rare Hyacinth Macaw yesterday in the Amazon Rainforest. Such a magnificent creature!
‎[8/15/23, 9:12:48 AM] Dr. Feather: ‎image omitted
[8/15/23, 9:13:15 AM] Jungle Jane: That's stunning! Were you able to observe its behavior?
‎[8/15/23, 9:13:23 AM] Dr. Feather: ‎image omitted
[8/15/23, 9:14:02 AM] Dr. Feather: Yes, it seemed quite social with other macaws. They're known for their playful nature.
[8/15/23, 9:14:15 AM] Jungle Jane: How's the research going on parrot communication?
‎[8/15/23, 9:14:30 AM] Dr. Feather: ‎image omitted
[8/15/23, 9:14:50 AM] Dr. Feather: It's progressing well. We're learning so much about how they use sound and color to communicate.
[8/15/23, 9:15:10 AM] Jungle Jane: That's fascinating! Can't wait to read your paper on it.
[8/15/23, 9:15:20 AM] Dr. Feather: Thank you! I'll send you a draft soon.
[8/15/23, 9:25:16 PM] Jungle Jane: Looking forward to it! Keep up the great work.

22 changes: 22 additions & 0 deletions libs/langchain-community/src/chat_loaders/tests/whatsapp.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import * as url from "node:url";
import * as path from "node:path";
import { test, expect } from "@jest/globals";
import { WhatsAppChatLoader } from "../whatsapp.js";

test("WhatsAppChatLoader parses WhatsApp export preserving timestamps and senders", async () => {
const filePath = path.resolve(
path.dirname(url.fileURLToPath(import.meta.url)),
"./example_data/whatsapp/whatsapp_chat.txt"
);

const loader = new WhatsAppChatLoader(filePath);
const sessions = await loader.load();
expect(sessions.length).toBe(1);
const { messages } = sessions[0];
expect(messages.length).toBeGreaterThan(0);
// Mirror Python assertion: content contains the macaw sentence
const first = messages[0];
expect(first.text).toContain(
"I spotted a rare Hyacinth Macaw yesterday in the Amazon Rainforest. Such a magnificent creature!"
);
});
126 changes: 126 additions & 0 deletions libs/langchain-community/src/chat_loaders/whatsapp.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import type { readFile as ReadFileT } from "node:fs/promises";
import { BaseMessage, HumanMessage } from "@langchain/core/messages";
import { getEnv } from "@langchain/core/utils/env";

export type ChatSession = {
messages: BaseMessage[];
// Reserved for future parity with Python ChatSession
// functions?: Array<Record<string, unknown>>;
};

/**
* Loader for WhatsApp chat export .txt files (without media).
*
* Parity with Python implementation:
* - Single regex with optional brackets and leading LRM
* - Slash-separated dates, 12-hour times with seconds and AM/PM
* - Multi-line messages joined with a single space
* - Exact ignore list and LRM-tolerant matching
*/
export class WhatsAppChatLoader {
constructor(public filePathOrBlob: string | Blob) {}

async load(): Promise<ChatSession[]> {
const sessions: ChatSession[] = [];
for await (const session of this.lazyLoad()) {
sessions.push(session);
}
return sessions;
}

async *lazyLoad(): AsyncGenerator<ChatSession> {
const text = await this.readAllText();
yield this.parse(text);
}

protected parse(raw: string): ChatSession {
const messages: BaseMessage[] = [];

const ignoreLines = [
"This message was deleted",
"<Media omitted>",
"image omitted",
"Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them.",
];
const escapeRe = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const ignoreRe = new RegExp(
`(${ignoreLines.map((s) => `\\u200E*${escapeRe(s)}`).join("|")})`,
"i"
);

const messageLineRe = new RegExp(
String.raw`^\u200E*\[?(\d{1,2}\/\d{1,2}\/\d{2,4}, \d{1,2}:\d{2}:\d{2} (?:AM|PM))\]?[ \u200E]*([^:]+): (.+)$`,
"i"
);

// Split messages by newlines but keep multi-line messages grouped
const chatLines: string[] = [];
let currentMessage = "";
for (const line of raw.split("\n")) {
if (messageLineRe.test(line)) {
if (currentMessage) chatLines.push(currentMessage);
currentMessage = line;
} else {
currentMessage += ` ${line.trim()}`;
}
}
if (currentMessage) chatLines.push(currentMessage);

for (const line of chatLines) {
const result = messageLineRe.exec(line.trim());
if (result) {
const [, timestamp, sender, text] = result;
if (!ignoreRe.test(text.trim())) {
messages.push(
new HumanMessage({
content: text,
name: sender,
additional_kwargs: {
sender,
events: [{ message_time: timestamp }],
},
})
);
}
} else {
// Mirror Python's debug logging for unparsable lines
console.debug(`Could not parse line: ${line}`);
}
}

return { messages };
}

protected async readAllText(): Promise<string> {
if (typeof this.filePathOrBlob === "string") {
const { readFile } = await WhatsAppChatLoader.imports();
try {
return await readFile(this.filePathOrBlob, "utf8");
} catch (e) {
console.error(e);
throw new Error("Failed to read file");
}
} else {
try {
return await this.filePathOrBlob.text();
} catch (e) {
console.error(e);
throw new Error("Failed to read blob");
}
}
}

static async imports(): Promise<{
readFile: typeof ReadFileT;
}> {
try {
const { readFile } = await import("node:fs/promises");
return { readFile };
} catch (e) {
console.error(e);
throw new Error(
`Failed to load fs/promises. WhatsAppChatLoader available only on environment 'node'. It appears you are running environment '${getEnv()}'.`
);
}
}
}
1 change: 1 addition & 0 deletions libs/langchain-community/src/load/import_map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,5 +247,6 @@ export * as experimental__callbacks__handlers__datadog from "../experimental/cal
export * as experimental__graph_transformers__llm from "../experimental/graph_transformers/llm.js";
export * as experimental__multimodal_embeddings__googlevertexai from "../experimental/multimodal_embeddings/googlevertexai.js";
export * as experimental__llms__chrome_ai from "../experimental/llms/chrome_ai.js";
export * as chat_loaders__whatsapp from "../chat_loaders/whatsapp.js";
export * as experimental__tools__pyinterpreter from "../experimental/tools/pyinterpreter.js";
export * as chains__graph_qa__cypher from "../chains/graph_qa/cypher.js";