Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
FROM node:lts-alpine AS build
FROM oven/bun:latest AS build

WORKDIR /app/

COPY . .

RUN corepack enable && corepack prepare pnpm@latest --activate
RUN --mount=type=cache,target=/root/.bun/install/cache \
bun install --frozen-lockfile && \
bun run build

RUN --mount=type=cache,target=/root/.local/share/pnpm \
pnpm install --prefer-offline && \
pnpm build

CMD [ "pnpm", "start" ]
CMD [ "bun", "run", "start" ]
EXPOSE 3000
39 changes: 30 additions & 9 deletions app/api/article/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { AppError, createNetworkError, createParseError } from "@/lib/errors";
import { createLogger } from "@/lib/logger";
import { Readability } from "@mozilla/readability";
import { JSDOM } from "jsdom";
import { getTextDirection } from "@/lib/rtl";

const logger = createLogger('api:article');

Expand All @@ -22,6 +23,7 @@ const DiffbotArticleSchema = z.object({
publishedTime: z.string().optional().nullable(),
image: z.string().nullable().optional(),
htmlContent: z.string().optional(),
lang: z.string().optional().nullable(),
});

// Article schema for caching
Expand All @@ -35,6 +37,8 @@ const CachedArticleSchema = z.object({
publishedTime: z.string().optional().nullable(),
image: z.string().nullable().optional(),
htmlContent: z.string().optional(),
lang: z.string().optional().nullable(),
dir: z.enum(['rtl', 'ltr']).optional().nullable(),
});

type CachedArticle = z.infer<typeof CachedArticleSchema>;
Expand Down Expand Up @@ -216,6 +220,15 @@ async function fetchArticleWithSmryFast(
};
}

// Extract language from HTML
const htmlLang = dom.window.document.documentElement.getAttribute('lang') ||
dom.window.document.documentElement.getAttribute('xml:lang') ||
parsed.lang || // Readability may extract this
null;

// Detect text direction based on language or content analysis
const textDir = getTextDirection(htmlLang, parsed.textContent);

const articleCandidate: CachedArticle = {
title: parsed.title || dom.window.document.title || 'Untitled',
content: parsed.content,
Expand All @@ -232,6 +245,8 @@ async function fetchArticleWithSmryFast(
publishedTime: extractDateFromDom(dom.window.document) || null,
image: extractImageFromDom(dom.window.document) || null,
htmlContent: originalHtml, // Original page HTML
lang: htmlLang,
dir: textDir,
};

const validationResult = CachedArticleSchema.safeParse(articleCandidate);
Expand Down Expand Up @@ -314,6 +329,9 @@ async function fetchArticleWithDiffbotWrapper(

const validatedArticle = validationResult.data;

// Detect text direction based on language or content analysis
const textDir = getTextDirection(validatedArticle.lang, validatedArticle.text);

const article: CachedArticle = {
title: validatedArticle.title,
content: validatedArticle.html,
Expand All @@ -324,9 +342,11 @@ async function fetchArticleWithDiffbotWrapper(
publishedTime: validatedArticle.publishedTime,
image: validatedArticle.image,
htmlContent: validatedArticle.htmlContent,
lang: validatedArticle.lang,
dir: textDir,
};

logger.debug({ source, title: article.title, length: article.length }, 'Diffbot article parsed and validated');
logger.debug({ source, title: article.title, length: article.length, lang: article.lang, dir: article.dir }, 'Diffbot article parsed and validated');
return { article, cacheURL: urlWithSource };
} catch (error) {
logger.error({ source, error }, 'Article parsing exception');
Expand Down Expand Up @@ -432,8 +452,8 @@ export async function GET(request: NextRequest) {
article: {
title: article.title,
byline: article.byline || null,
dir: "",
lang: "",
dir: article.dir || getTextDirection(article.lang, article.textContent),
lang: article.lang || "",
content: article.content,
textContent: article.textContent,
length: article.length,
Expand Down Expand Up @@ -516,8 +536,8 @@ export async function GET(request: NextRequest) {
article: {
title: article.title,
byline: article.byline || null,
dir: "",
lang: "",
dir: article.dir || getTextDirection(article.lang, article.textContent),
lang: article.lang || "",
content: article.content,
textContent: article.textContent,
length: article.length,
Expand All @@ -539,8 +559,8 @@ export async function GET(request: NextRequest) {
article: {
title: validatedSavedArticle.title,
byline: validatedSavedArticle.byline || null,
dir: "",
lang: "",
dir: validatedSavedArticle.dir || getTextDirection(validatedSavedArticle.lang, validatedSavedArticle.textContent),
lang: validatedSavedArticle.lang || "",
content: validatedSavedArticle.content,
textContent: validatedSavedArticle.textContent,
length: validatedSavedArticle.length,
Expand Down Expand Up @@ -587,13 +607,14 @@ export async function GET(request: NextRequest) {
article: {
title: validatedArticle.title,
byline: validatedArticle.byline || null,
dir: "",
lang: "",
dir: validatedArticle.dir || getTextDirection(validatedArticle.lang, validatedArticle.textContent),
lang: validatedArticle.lang || "",
content: validatedArticle.content,
textContent: validatedArticle.textContent,
length: validatedArticle.length,
siteName: validatedArticle.siteName,
publishedTime: validatedArticle.publishedTime || null,
image: validatedArticle.image || null,
Comment on lines +610 to +617
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Search for ArticleResponseSchema definition
rg -n "ArticleResponseSchema" --type=ts -A 40

Repository: mrmps/SMRY

Length of output: 29677


🏁 Script executed:

#!/bin/bash
# Find ArticleSchema definition
rg -n "ArticleSchema\s*=" --type=ts -B 2 -A 20 | head -100

Repository: mrmps/SMRY

Length of output: 5544


Response object inconsistency: image field presence varies across paths.

The response at line 617 includes image: validatedArticle.image || null, but similar response constructions at lines 533-549 and 556-572 omit this field. While ArticleSchema defines image as optional, this inconsistency means clients will receive different response structures depending on which code path executes (cache hit, post-fetch validation, or error handler).

Add the image field to the response blocks at lines 533-549 and 556-572 for consistency.

🤖 Prompt for AI Agents
In app/api/article/route.ts around lines 533-549, 556-572 and 610-617, the
responses are inconsistent: the block at 610-617 includes image:
validatedArticle.image || null but the earlier response blocks (533-549 and
556-572) omit image; add image: validatedArticle.image || null to the response
objects in the 533-549 and 556-572 blocks so all code paths return the same
shape (use the same null fallback as the existing block) and keep field ordering
consistent with the other response properties.

htmlContent: validatedArticle.htmlContent,
},
status: "success",
Expand Down
28 changes: 19 additions & 9 deletions app/api/jina/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { fromError } from "zod-validation-error";
import { createLogger } from "@/lib/logger";
import { redis } from "@/lib/redis";
import { compress, decompress } from "@/lib/redis-compression";
import { getTextDirection } from "@/lib/rtl";

const logger = createLogger('api:jina');

Expand All @@ -18,6 +19,8 @@ const CachedArticleSchema = z.object({
byline: z.string().optional().nullable(),
publishedTime: z.string().optional().nullable(),
htmlContent: z.string().optional(), // Not available for jina.ai source
lang: z.string().optional().nullable(),
dir: z.enum(['rtl', 'ltr']).optional().nullable(),
});

/**
Expand Down Expand Up @@ -65,8 +68,8 @@ export async function GET(request: NextRequest) {
article: {
...article,
byline: article.byline || "",
dir: "",
lang: "",
dir: article.dir || getTextDirection(article.lang, article.textContent),
lang: article.lang || "",
publishedTime: article.publishedTime || null,
htmlContent: article.content, // Use markdown-converted HTML as htmlContent
},
Expand Down Expand Up @@ -158,18 +161,22 @@ export async function POST(request: NextRequest) {
]);
};

// Detect text direction for the incoming article
const articleDir = getTextDirection(null, article.textContent);
const articleWithDir = { ...article, dir: articleDir, lang: article.lang || null };
Comment on lines +164 to +166
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

article.lang doesn't exist on the input schema.

The JinaCacheUpdateSchema (defined in types/api.ts, lines 92-104) doesn't include a lang field. The expression article.lang || null on line 166 will always evaluate to null because article.lang is undefined.

Either add lang to JinaCacheUpdateSchema if callers can provide it, or simplify:

-      const articleWithDir = { ...article, dir: articleDir, lang: article.lang || null };
+      const articleWithDir = { ...article, dir: articleDir, lang: null };
#!/bin/bash
# Verify JinaCacheUpdateSchema definition
echo "=== JinaCacheUpdateSchema definition ==="
rg -n -A15 'JinaCacheUpdateSchema' types/api.ts
🤖 Prompt for AI Agents
In app/api/jina/route.ts around lines 164 to 166, the code reads article.lang ||
null but JinaCacheUpdateSchema does not define a lang field so article.lang will
always be undefined; either (A) add an optional lang?: string | null to
JinaCacheUpdateSchema in types/api.ts (update the type at lines ~92-104 and
ensure any callers populate it where appropriate), or (B) simplify the route
code to stop referencing article.lang and set lang: null (or omit lang entirely)
when constructing articleWithDir; pick A if callers can/should supply language,
otherwise pick B and remove the redundant check.


// Only update if new article is longer or doesn't exist
if (!validatedExisting || article.length > validatedExisting.length) {
await saveToCache(article);
logger.info({ hostname: new URL(url).hostname, length: article.length }, 'Jina cache updated');
await saveToCache(articleWithDir);
logger.info({ hostname: new URL(url).hostname, length: article.length, dir: articleDir }, 'Jina cache updated');

const response = ArticleResponseSchema.parse({
source: "jina.ai",
cacheURL: `https://r.jina.ai/${url}`,
article: {
...article,
...articleWithDir,
byline: article.byline || "",
dir: "",
dir: articleDir,
lang: "",
publishedTime: article.publishedTime || null,
htmlContent: article.content, // Use markdown-converted HTML as htmlContent
Expand All @@ -187,8 +194,8 @@ export async function POST(request: NextRequest) {
article: {
...validatedExisting,
byline: validatedExisting.byline || "",
dir: "",
lang: "",
dir: validatedExisting.dir || getTextDirection(validatedExisting.lang, validatedExisting.textContent),
lang: validatedExisting.lang || "",
publishedTime: validatedExisting.publishedTime || null,
htmlContent: validatedExisting.content, // Use markdown-converted HTML as htmlContent
},
Expand All @@ -200,14 +207,17 @@ export async function POST(request: NextRequest) {
} catch (error) {
logger.warn({ error: error instanceof Error ? error.message : String(error) }, 'Jina cache update error');

// Detect text direction for the article
const articleDir = getTextDirection(null, article.textContent);

// Return the article even if caching fails
const response = ArticleResponseSchema.parse({
source: "jina.ai",
cacheURL: `https://r.jina.ai/${url}`,
article: {
...article,
byline: article.byline || "",
dir: "",
dir: articleDir,
lang: "",
publishedTime: article.publishedTime || null,
htmlContent: article.content, // Use markdown-converted HTML as htmlContent
Expand Down
44 changes: 44 additions & 0 deletions app/globals.css
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,47 @@
display: none !important;
}
}

/* RTL (Right-to-Left) language support */
@layer base {
/* Ensure proper text alignment for RTL content */
[dir="rtl"] {
text-align: right;
}

[dir="ltr"] {
text-align: left;
}

/* RTL-aware prose styling */
[dir="rtl"].prose,
.prose[dir="rtl"] {
text-align: right;
}

/* RTL blockquote border adjustment */
[dir="rtl"] blockquote,
[dir="rtl"].prose blockquote {
border-left: none;
border-right: 4px solid var(--border);
padding-left: 0;
padding-right: 1rem;
}

/* RTL list markers */
[dir="rtl"] ol,
[dir="rtl"] ul {
padding-left: 0;
padding-right: 1.5rem;
}

/* RTL code blocks */
[dir="rtl"] pre {
direction: ltr; /* Keep code LTR */
text-align: left;
}

[dir="rtl"] code {
direction: ltr; /* Keep inline code LTR */
}
}
Loading