Skip to content

Commit 2fbc024

Browse files
committed
fix(notion-translate): split heading rich text and extend limit tests
1 parent 324e292 commit 2fbc024

File tree

2 files changed

+208
-32
lines changed

2 files changed

+208
-32
lines changed

scripts/notion-translate/markdownToNotion.test.ts

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,174 @@ describe("markdownToNotion", () => {
2222
const scriptModule = await import("./markdownToNotion");
2323
expect(typeof scriptModule).toBe("object");
2424
});
25+
26+
describe("markdownToNotionBlocks – Notion 2000-char rich_text limit", () => {
27+
it("should split a blockquote longer than 2000 chars into multiple rich_text items", async () => {
28+
const { markdownToNotionBlocks } = await import("./markdownToNotion");
29+
30+
// Build a blockquote whose text is ~2844 chars (replicating the real failure)
31+
const longText = "A".repeat(2844);
32+
const markdown = `> ${longText}`;
33+
34+
const blocks = await markdownToNotionBlocks(markdown);
35+
36+
const quoteBlocks = blocks.filter((b) => "quote" in b);
37+
expect(quoteBlocks.length).toBeGreaterThanOrEqual(1);
38+
39+
// Every rich_text item in every quote block must be ≤ 2000 chars
40+
for (const block of quoteBlocks) {
41+
const richText = (
42+
block as {
43+
quote: { rich_text: Array<{ text: { content: string } }> };
44+
}
45+
).quote.rich_text;
46+
for (const item of richText) {
47+
expect(item.text.content.length).toBeLessThanOrEqual(2000);
48+
}
49+
}
50+
51+
// The combined text should equal the original
52+
const combined = quoteBlocks
53+
.flatMap(
54+
(b) =>
55+
(
56+
b as {
57+
quote: { rich_text: Array<{ text: { content: string } }> };
58+
}
59+
).quote.rich_text
60+
)
61+
.map((item) => item.text.content)
62+
.join("");
63+
expect(combined).toBe(longText);
64+
});
65+
66+
it("should keep a short blockquote as a single rich_text item", async () => {
67+
const { markdownToNotionBlocks } = await import("./markdownToNotion");
68+
69+
const markdown = "> Short quote text";
70+
const blocks = await markdownToNotionBlocks(markdown);
71+
72+
const quoteBlocks = blocks.filter((b) => "quote" in b);
73+
expect(quoteBlocks.length).toBe(1);
74+
const richText = (quoteBlocks[0] as { quote: { rich_text: unknown[] } })
75+
.quote.rich_text;
76+
expect(richText.length).toBe(1);
77+
});
78+
79+
it("should split a paragraph longer than 2000 chars into multiple rich_text items", async () => {
80+
const { markdownToNotionBlocks } = await import("./markdownToNotion");
81+
82+
const longText = "B".repeat(2500);
83+
const blocks = await markdownToNotionBlocks(longText);
84+
85+
const paragraphBlocks = blocks.filter((b) => "paragraph" in b);
86+
expect(paragraphBlocks.length).toBeGreaterThanOrEqual(1);
87+
88+
for (const block of paragraphBlocks) {
89+
const richText = (
90+
block as {
91+
paragraph: { rich_text: Array<{ text: { content: string } }> };
92+
}
93+
).paragraph.rich_text;
94+
for (const item of richText) {
95+
expect(item.text.content.length).toBeLessThanOrEqual(2000);
96+
}
97+
}
98+
99+
const combined = paragraphBlocks
100+
.flatMap(
101+
(b) =>
102+
(
103+
b as {
104+
paragraph: { rich_text: Array<{ text: { content: string } }> };
105+
}
106+
).paragraph.rich_text
107+
)
108+
.map((item) => item.text.content)
109+
.join("");
110+
expect(combined).toBe(longText);
111+
});
112+
113+
it("should prefer splitting at word boundaries for long natural-language text", async () => {
114+
const { markdownToNotionBlocks } = await import("./markdownToNotion");
115+
116+
const sentence = "This is a natural language sentence for split testing.";
117+
const longText = `${sentence} `.repeat(80).trim();
118+
expect(longText.length).toBeGreaterThan(1900);
119+
120+
const blocks = await markdownToNotionBlocks(longText);
121+
const paragraphBlocks = blocks.filter((b) => "paragraph" in b);
122+
expect(paragraphBlocks.length).toBe(1);
123+
124+
const richText = (
125+
paragraphBlocks[0] as {
126+
paragraph: { rich_text: Array<{ text: { content: string } }> };
127+
}
128+
).paragraph.rich_text;
129+
expect(richText.length).toBeGreaterThan(1);
130+
131+
for (const item of richText) {
132+
expect(item.text.content.length).toBeLessThanOrEqual(2000);
133+
}
134+
135+
for (const item of richText.slice(0, -1)) {
136+
expect(item.text.content.endsWith(" ")).toBe(true);
137+
}
138+
139+
const combined = richText.map((item) => item.text.content).join("");
140+
expect(combined).toBe(longText);
141+
});
142+
143+
it("should split a list item longer than 2000 chars into multiple rich_text items", async () => {
144+
const { markdownToNotionBlocks } = await import("./markdownToNotion");
145+
146+
const longItem = "List item content ".repeat(150).trim();
147+
expect(longItem.length).toBeGreaterThan(1900);
148+
149+
const blocks = await markdownToNotionBlocks(`- ${longItem}`);
150+
const listBlocks = blocks.filter((b) => "bulleted_list_item" in b);
151+
expect(listBlocks.length).toBe(1);
152+
153+
const richText = (
154+
listBlocks[0] as {
155+
bulleted_list_item: {
156+
rich_text: Array<{ text: { content: string } }>;
157+
};
158+
}
159+
).bulleted_list_item.rich_text;
160+
expect(richText.length).toBeGreaterThan(1);
161+
162+
for (const item of richText) {
163+
expect(item.text.content.length).toBeLessThanOrEqual(2000);
164+
}
165+
166+
const combined = richText.map((item) => item.text.content).join("");
167+
expect(combined).toBe(longItem);
168+
});
169+
170+
it("should split a heading longer than 2000 chars into multiple rich_text items", async () => {
171+
const { markdownToNotionBlocks } = await import("./markdownToNotion");
172+
173+
const longHeading = "Heading text ".repeat(220).trim();
174+
expect(longHeading.length).toBeGreaterThan(1900);
175+
176+
const blocks = await markdownToNotionBlocks(`# ${longHeading}`);
177+
const headingBlocks = blocks.filter((b) => "heading_1" in b);
178+
expect(headingBlocks.length).toBe(1);
179+
180+
const richText = (
181+
headingBlocks[0] as {
182+
heading_1: { rich_text: Array<{ text: { content: string } }> };
183+
}
184+
).heading_1.rich_text;
185+
expect(richText.length).toBeGreaterThan(1);
186+
187+
for (const item of richText) {
188+
expect(item.text.content.length).toBeLessThanOrEqual(2000);
189+
}
190+
191+
const combined = richText.map((item) => item.text.content).join("");
192+
expect(combined).toBe(longHeading);
193+
});
194+
});
25195
});

scripts/notion-translate/markdownToNotion.ts

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818

1919
const EMPTY_TRANSLATED_CONTENT_ERROR =
2020
"Translated content is empty - cannot create page. Please check if the English source has content.";
21+
const MAX_RICH_TEXT_LENGTH = 1900; // Notion API limit is 2000; use 1900 to be safe
2122

2223
// Type definition for page results from dataSources.query
2324
interface NotionPageResult {
@@ -130,14 +131,7 @@ export async function markdownToNotionBlocks(
130131

131132
notionBlocks.push({
132133
paragraph: {
133-
rich_text: [
134-
{
135-
type: "text",
136-
text: {
137-
content: paragraphText,
138-
},
139-
},
140-
],
134+
rich_text: splitIntoRichTextItems(paragraphText),
141135
},
142136
});
143137
break;
@@ -156,14 +150,7 @@ export async function markdownToNotionBlocks(
156150
notionBlocks.push({
157151
type: blockType,
158152
[blockType]: {
159-
rich_text: [
160-
{
161-
type: "text",
162-
text: {
163-
content: item,
164-
},
165-
},
166-
],
153+
rich_text: splitIntoRichTextItems(item),
167154
},
168155
} as unknown as BlockObjectRequest);
169156
}
@@ -289,14 +276,7 @@ export async function markdownToNotionBlocks(
289276

290277
notionBlocks.push({
291278
quote: {
292-
rich_text: [
293-
{
294-
type: "text",
295-
text: {
296-
content: quoteText,
297-
},
298-
},
299-
],
279+
rich_text: splitIntoRichTextItems(quoteText),
300280
},
301281
});
302282
break;
@@ -378,6 +358,39 @@ function getTextFromNode(node: MarkdownNode | TextNode | unknown): string {
378358
return "";
379359
}
380360

361+
/**
362+
* Splits a long string into an array of rich_text items, each within Notion's
363+
* 2000-character limit. Splits at word boundaries when possible.
364+
*/
365+
function splitIntoRichTextItems(
366+
text: string
367+
): Array<{ type: "text"; text: { content: string } }> {
368+
if (text.length <= MAX_RICH_TEXT_LENGTH) {
369+
return [{ type: "text", text: { content: text } }];
370+
}
371+
372+
const items: Array<{ type: "text"; text: { content: string } }> = [];
373+
let remaining = text;
374+
375+
while (remaining.length > 0) {
376+
let splitIndex = Math.min(remaining.length, MAX_RICH_TEXT_LENGTH);
377+
if (remaining.length > MAX_RICH_TEXT_LENGTH) {
378+
// Prefer splitting at a word boundary
379+
const spaceIndex = remaining.lastIndexOf(" ", MAX_RICH_TEXT_LENGTH);
380+
if (spaceIndex > 0) {
381+
splitIndex = spaceIndex + 1;
382+
}
383+
}
384+
items.push({
385+
type: "text",
386+
text: { content: remaining.substring(0, splitIndex) },
387+
});
388+
remaining = remaining.substring(splitIndex);
389+
}
390+
391+
return items;
392+
}
393+
381394
/**
382395
* Creates a heading block with the specified level
383396
*/
@@ -393,14 +406,7 @@ function createHeadingBlock(
393406
return {
394407
type: headingType,
395408
[headingType]: {
396-
rich_text: [
397-
{
398-
type: "text",
399-
text: {
400-
content: text,
401-
},
402-
},
403-
],
409+
rich_text: splitIntoRichTextItems(text),
404410
},
405411
} as unknown as BlockObjectRequest;
406412
}

0 commit comments

Comments
 (0)