fix(notion-translate): split heading rich text and extend limit tests

luandro · luandro · commit 2fbc0246c82f · 2026-02-18T22:51:46.000-03:00
diff --git a/scripts/notion-translate/markdownToNotion.test.ts b/scripts/notion-translate/markdownToNotion.test.ts
@@ -22,4 +22,174 @@ describe("markdownToNotion", () => {
     const scriptModule = await import("./markdownToNotion");
     expect(typeof scriptModule).toBe("object");
   });
+
+  describe("markdownToNotionBlocks – Notion 2000-char rich_text limit", () => {
+    it("should split a blockquote longer than 2000 chars into multiple rich_text items", async () => {
+      const { markdownToNotionBlocks } = await import("./markdownToNotion");
+
+      // Build a blockquote whose text is ~2844 chars (replicating the real failure)
+      const longText = "A".repeat(2844);
+      const markdown = `> ${longText}`;
+
+      const blocks = await markdownToNotionBlocks(markdown);
+
+      const quoteBlocks = blocks.filter((b) => "quote" in b);
+      expect(quoteBlocks.length).toBeGreaterThanOrEqual(1);
+
+      // Every rich_text item in every quote block must be ≤ 2000 chars
+      for (const block of quoteBlocks) {
+        const richText = (
+          block as {
+            quote: { rich_text: Array<{ text: { content: string } }> };
+          }
+        ).quote.rich_text;
+        for (const item of richText) {
+          expect(item.text.content.length).toBeLessThanOrEqual(2000);
+        }
+      }
+
+      // The combined text should equal the original
+      const combined = quoteBlocks
+        .flatMap(
+          (b) =>
+            (
+              b as {
+                quote: { rich_text: Array<{ text: { content: string } }> };
+              }
+            ).quote.rich_text
+        )
+        .map((item) => item.text.content)
+        .join("");
+      expect(combined).toBe(longText);
+    });
+
+    it("should keep a short blockquote as a single rich_text item", async () => {
+      const { markdownToNotionBlocks } = await import("./markdownToNotion");
+
+      const markdown = "> Short quote text";
+      const blocks = await markdownToNotionBlocks(markdown);
+
+      const quoteBlocks = blocks.filter((b) => "quote" in b);
+      expect(quoteBlocks.length).toBe(1);
+      const richText = (quoteBlocks[0] as { quote: { rich_text: unknown[] } })
+        .quote.rich_text;
+      expect(richText.length).toBe(1);
+    });
+
+    it("should split a paragraph longer than 2000 chars into multiple rich_text items", async () => {
+      const { markdownToNotionBlocks } = await import("./markdownToNotion");
+
+      const longText = "B".repeat(2500);
+      const blocks = await markdownToNotionBlocks(longText);
+
+      const paragraphBlocks = blocks.filter((b) => "paragraph" in b);
+      expect(paragraphBlocks.length).toBeGreaterThanOrEqual(1);
+
+      for (const block of paragraphBlocks) {
+        const richText = (
+          block as {
+            paragraph: { rich_text: Array<{ text: { content: string } }> };
+          }
+        ).paragraph.rich_text;
+        for (const item of richText) {
+          expect(item.text.content.length).toBeLessThanOrEqual(2000);
+        }
+      }
+
+      const combined = paragraphBlocks
+        .flatMap(
+          (b) =>
+            (
+              b as {
+                paragraph: { rich_text: Array<{ text: { content: string } }> };
+              }
+            ).paragraph.rich_text
+        )
+        .map((item) => item.text.content)
+        .join("");
+      expect(combined).toBe(longText);
+    });
+
+    it("should prefer splitting at word boundaries for long natural-language text", async () => {
+      const { markdownToNotionBlocks } = await import("./markdownToNotion");
+
+      const sentence = "This is a natural language sentence for split testing.";
+      const longText = `${sentence} `.repeat(80).trim();
+      expect(longText.length).toBeGreaterThan(1900);
+
+      const blocks = await markdownToNotionBlocks(longText);
+      const paragraphBlocks = blocks.filter((b) => "paragraph" in b);
+      expect(paragraphBlocks.length).toBe(1);
+
+      const richText = (
+        paragraphBlocks[0] as {
+          paragraph: { rich_text: Array<{ text: { content: string } }> };
+        }
+      ).paragraph.rich_text;
+      expect(richText.length).toBeGreaterThan(1);
+
+      for (const item of richText) {
+        expect(item.text.content.length).toBeLessThanOrEqual(2000);
+      }
+
+      for (const item of richText.slice(0, -1)) {
+        expect(item.text.content.endsWith(" ")).toBe(true);
+      }
+
+      const combined = richText.map((item) => item.text.content).join("");
+      expect(combined).toBe(longText);
+    });
+
+    it("should split a list item longer than 2000 chars into multiple rich_text items", async () => {
+      const { markdownToNotionBlocks } = await import("./markdownToNotion");
+
+      const longItem = "List item content ".repeat(150).trim();
+      expect(longItem.length).toBeGreaterThan(1900);
+
+      const blocks = await markdownToNotionBlocks(`- ${longItem}`);
+      const listBlocks = blocks.filter((b) => "bulleted_list_item" in b);
+      expect(listBlocks.length).toBe(1);
+
+      const richText = (
+        listBlocks[0] as {
+          bulleted_list_item: {
+            rich_text: Array<{ text: { content: string } }>;
+          };
+        }
+      ).bulleted_list_item.rich_text;
+      expect(richText.length).toBeGreaterThan(1);
+
+      for (const item of richText) {
+        expect(item.text.content.length).toBeLessThanOrEqual(2000);
+      }
+
+      const combined = richText.map((item) => item.text.content).join("");
+      expect(combined).toBe(longItem);
+    });
+
+    it("should split a heading longer than 2000 chars into multiple rich_text items", async () => {
+      const { markdownToNotionBlocks } = await import("./markdownToNotion");
+
+      const longHeading = "Heading text ".repeat(220).trim();
+      expect(longHeading.length).toBeGreaterThan(1900);
+
+      const blocks = await markdownToNotionBlocks(`# ${longHeading}`);
+      const headingBlocks = blocks.filter((b) => "heading_1" in b);
+      expect(headingBlocks.length).toBe(1);
+
+      const richText = (
+        headingBlocks[0] as {
+          heading_1: { rich_text: Array<{ text: { content: string } }> };
+        }
+      ).heading_1.rich_text;
+      expect(richText.length).toBeGreaterThan(1);
+
+      for (const item of richText) {
+        expect(item.text.content.length).toBeLessThanOrEqual(2000);
+      }
+
+      const combined = richText.map((item) => item.text.content).join("");
+      expect(combined).toBe(longHeading);
+    });
+  });
 });
diff --git a/scripts/notion-translate/markdownToNotion.ts b/scripts/notion-translate/markdownToNotion.ts
@@ -18,6 +18,7 @@ import {
 
 const EMPTY_TRANSLATED_CONTENT_ERROR =
   "Translated content is empty - cannot create page. Please check if the English source has content.";
+const MAX_RICH_TEXT_LENGTH = 1900; // Notion API limit is 2000; use 1900 to be safe
 
 // Type definition for page results from dataSources.query
 interface NotionPageResult {
@@ -130,14 +131,7 @@ export async function markdownToNotionBlocks(
 
         notionBlocks.push({
           paragraph: {
-            rich_text: [
-              {
-                type: "text",
-                text: {
-                  content: paragraphText,
-                },
-              },
-            ],
+            rich_text: splitIntoRichTextItems(paragraphText),
           },
         });
         break;
@@ -156,14 +150,7 @@ export async function markdownToNotionBlocks(
           notionBlocks.push({
             type: blockType,
             [blockType]: {
-              rich_text: [
-                {
-                  type: "text",
-                  text: {
-                    content: item,
-                  },
-                },
-              ],
+              rich_text: splitIntoRichTextItems(item),
             },
           } as unknown as BlockObjectRequest);
         }
@@ -289,14 +276,7 @@ export async function markdownToNotionBlocks(
 
         notionBlocks.push({
           quote: {
-            rich_text: [
-              {
-                type: "text",
-                text: {
-                  content: quoteText,
-                },
-              },
-            ],
+            rich_text: splitIntoRichTextItems(quoteText),
           },
         });
         break;
@@ -378,6 +358,39 @@ function getTextFromNode(node: MarkdownNode | TextNode | unknown): string {
   return "";
 }
 
+/**
+ * Splits a long string into an array of rich_text items, each within Notion's
+ * 2000-character limit. Splits at word boundaries when possible.
+ */
+function splitIntoRichTextItems(
+  text: string
+): Array<{ type: "text"; text: { content: string } }> {
+  if (text.length <= MAX_RICH_TEXT_LENGTH) {
+    return [{ type: "text", text: { content: text } }];
+  }
+
+  const items: Array<{ type: "text"; text: { content: string } }> = [];
+  let remaining = text;
+
+  while (remaining.length > 0) {
+    let splitIndex = Math.min(remaining.length, MAX_RICH_TEXT_LENGTH);
+    if (remaining.length > MAX_RICH_TEXT_LENGTH) {
+      // Prefer splitting at a word boundary
+      const spaceIndex = remaining.lastIndexOf(" ", MAX_RICH_TEXT_LENGTH);
+      if (spaceIndex > 0) {
+        splitIndex = spaceIndex + 1;
+      }
+    }
+    items.push({
+      type: "text",
+      text: { content: remaining.substring(0, splitIndex) },
+    });
+    remaining = remaining.substring(splitIndex);
+  }
+
+  return items;
+}
+
 /**
  * Creates a heading block with the specified level
  */
@@ -393,14 +406,7 @@ function createHeadingBlock(
   return {
     type: headingType,
     [headingType]: {
-      rich_text: [
-        {
-          type: "text",
-          text: {
-            content: text,
-          },
-        },
-      ],
+      rich_text: splitIntoRichTextItems(text),
     },
   } as unknown as BlockObjectRequest;
 }