Skip to content

Commit 93a8e00

Browse files
authored
Merge branch 'master' into turn-back-Apify-vale-rules
2 parents cc6c41f + 42affbc commit 93a8e00

File tree

8 files changed

+199
-3
lines changed

8 files changed

+199
-3
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: Nginx configuration test
2+
3+
on:
4+
pull_request:
5+
paths:
6+
- nginx.conf
7+
8+
jobs:
9+
nginx-test-job:
10+
runs-on: ubuntu-latest
11+
container:
12+
image: docker.io/library/nginx:1.28.0-alpine-slim
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v5
16+
- name: Test Nginx configuration
17+
run: |
18+
cat > default.conf <<EOF
19+
http {
20+
include 'nginx.conf';
21+
}
22+
events {}
23+
EOF
24+
nginx -t -c $(pwd)/default.conf

docusaurus.config.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,18 @@ module.exports = {
361361
const ogImageURL = new URL('https://apify.com/og-image/docs-article');
362362
ogImageURL.searchParams.set('title', result.frontMatter.title);
363363
result.frontMatter.image ??= ogImageURL.toString();
364+
365+
// Remove import statements and JSX/MDX tags from content
366+
const contentText = result.content
367+
.replace(/import\s+[^;]+;?/g, '') // Remove import statements
368+
.replace(/<[^>]+>/g, '') // Remove all tags (JSX/MDX)
369+
.replace(/\n+/g, ' ') // Replace newlines with space
370+
.replace(/\s+/g, ' ') // Collapse whitespace
371+
.trim();
372+
373+
const sentenceMatch = contentText.match(/^(.*?[.!?])\s/);
374+
375+
result.frontMatter.description = sentenceMatch ? sentenceMatch[1].trim() : contentText;
364376
}
365377

366378
return result;

nginx.conf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,9 @@ server {
421421
# Actor marketing playbook addition
422422
rewrite ^/academy/get-most-of-actors$ /academy/actor-marketing-playbook permanent;
423423

424+
# Misc
425+
rewrite ^/platform/integrations/llama$ /platform/integrations/llama-index permanent;
426+
424427
}
425428

426429
# Temporarily used to route crawlee.dev to the Crawlee GitHub pages.

package-lock.json

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
"lint:code": "eslint .",
4242
"lint:code:fix": "eslint . --fix",
4343
"postinstall": "patch-package",
44-
"postbuild": "node ./scripts/joinLlmsFiles.mjs"
44+
"postbuild": "node ./scripts/joinLlmsFiles.mjs && node ./scripts/indentLlmsFile.mjs"
4545
},
4646
"devDependencies": {
4747
"@apify/eslint-config": "^1.0.0",

scripts/indentLlmsFile.mjs

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
import fs from 'node:fs/promises';
2+
import path from 'node:path';
3+
4+
const BUILD_DIR = path.resolve('build');
5+
const LLMS_FILE = path.join(BUILD_DIR, 'llms.txt');
6+
7+
const INDENT_LEVEL = 2;
8+
9+
const MAIN_SECTIONS = ['/api.md', '/api/v2.md'];
10+
11+
const BASE_URL = process.env.APIFY_DOCS_ABSOLUTE_URL || 'https://docs.apify.com';
12+
13+
/**
14+
* Extracts the path from a URL, removing the base URL and query parameters
15+
*/
16+
function extractPathFromUrl(url) {
17+
const urlObj = new URL(url);
18+
return urlObj.pathname;
19+
}
20+
21+
/**
22+
* Calculates the hierarchical depth of a URL path.
23+
* This counts directory levels, not including the filename.
24+
*/
25+
function getUrlHierarchyDepth(url) {
26+
const urlPath = extractPathFromUrl(url);
27+
const segments = urlPath.split('/').filter((segment) => segment && segment !== '');
28+
const nonFileSegments = segments.filter((segment) => !segment.endsWith('.md'));
29+
30+
return nonFileSegments.length;
31+
}
32+
33+
/**
34+
* Determines if a URL is a main section page (level 0)
35+
*/
36+
function isMainSectionPage(url) {
37+
const urlPath = extractPathFromUrl(url);
38+
const segments = urlPath.split('/').filter((segment) => segment && segment !== '');
39+
40+
// Main pages are those with only one segment (the .md file)
41+
if (segments.length === 1) {
42+
return true;
43+
}
44+
45+
// Special cases for main API pages
46+
if (MAIN_SECTIONS.includes(urlPath)) {
47+
return true;
48+
}
49+
50+
return false;
51+
}
52+
53+
/**
54+
* Determines the indentation level for a documentation link based on its URL hierarchy.
55+
*/
56+
function getLinkIndentation(url) {
57+
// Main section pages get no indentation
58+
if (isMainSectionPage(url)) {
59+
return 0;
60+
}
61+
62+
const depth = getUrlHierarchyDepth(url);
63+
64+
// The first level after main sections gets 1 level of indentation
65+
// Each subsequent level gets another level of indentation
66+
return Math.min(depth * INDENT_LEVEL, INDENT_LEVEL * 4);
67+
}
68+
69+
/**
70+
* Determines the indentation level for a line based on its content type and URL.
71+
*/
72+
function getIndentationLevel(line, lineIndex, allLines) {
73+
if (line.startsWith('# ') || line.startsWith('## ')) {
74+
return 0;
75+
}
76+
77+
if (line.startsWith('### ')) {
78+
return INDENT_LEVEL;
79+
}
80+
81+
if (line.startsWith('#### ')) {
82+
return INDENT_LEVEL * 2;
83+
}
84+
85+
// Handle markdown links with URLs
86+
if (line.startsWith('- [') && line.includes(`](${BASE_URL}/`)) {
87+
// Extract URL from markdown link format: - [Link Text](https://docs.apify.com/path/to/page)
88+
// Example: "- [API Reference](https://docs.apify.com/api/v2)" → extracts "https://docs.apify.com/api/v2"
89+
const urlMatch = line.match(new RegExp(`\\]\\((${BASE_URL.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}/[^)]+)\\)`));
90+
if (!urlMatch) {
91+
return INDENT_LEVEL;
92+
}
93+
return getLinkIndentation(urlMatch[1]);
94+
}
95+
96+
// For other content, use the same indentation as the previous line
97+
if (lineIndex > 0) {
98+
const prevLine = allLines[lineIndex - 1];
99+
const prevIndentMatch = prevLine.match(/^(\s*)/);
100+
return prevIndentMatch ? prevIndentMatch[1].length : INDENT_LEVEL;
101+
}
102+
103+
return INDENT_LEVEL;
104+
}
105+
106+
/**
107+
* Applies hierarchical indentation to content based on URL structure and content type.
108+
*/
109+
function indentContent(content) {
110+
const lines = content.split('\n');
111+
const indentedLines = [];
112+
113+
for (let i = 0; i < lines.length; i++) {
114+
const line = lines[i];
115+
const trimmedLine = line.trim();
116+
117+
// Preserve empty lines (add them without indentation)
118+
if (!trimmedLine) {
119+
indentedLines.push('');
120+
continue;
121+
}
122+
123+
const indent = getIndentationLevel(trimmedLine, i, lines);
124+
const indentStr = ' '.repeat(indent);
125+
indentedLines.push(indentStr + trimmedLine);
126+
}
127+
128+
return indentedLines.join('\n');
129+
}
130+
131+
/**
132+
* Main function to indent the LLMs file.
133+
* Reads the file, applies indentation, and writes it back.
134+
*/
135+
async function indentLlmsFile() {
136+
try {
137+
await fs.access(LLMS_FILE);
138+
const content = await fs.readFile(LLMS_FILE, 'utf8');
139+
const indentedContent = indentContent(content);
140+
await fs.writeFile(LLMS_FILE, indentedContent, 'utf8');
141+
console.log('Successfully indented llms.txt file');
142+
} catch (error) {
143+
if (error.code === 'ENOENT') {
144+
console.log('llms.txt file not found, skipping indentation');
145+
} else {
146+
console.error('Error indenting llms.txt file:', error);
147+
process.exit(1);
148+
}
149+
}
150+
}
151+
152+
await indentLlmsFile();

sources/platform/actors/development/programming_interface/metamorph.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ These benefits make metamorph a valuable tool for creating complex, efficient wo
3333

3434
## Implementation guidelines
3535

36-
To make your Actor compatible with metamorph, use `Actor.getInput()` instead of `Actor.getValue(&#96;INPUT&#96;)`. This method fetches the input using the correct key (_INPUT-METAMORPH-1_) for metamorphed runs, ensuring proper data retrieval in transformed Actor runs.
36+
To make your Actor compatible with metamorph, use `Actor.getInput()` instead of `Actor.getValue('INPUT')`. This method fetches the input using the correct key (_INPUT-METAMORPH-1_) for metamorphed runs, ensuring proper data retrieval in transformed Actor runs.
3737

3838
:::note Runtime limits
3939

sources/platform/integrations/ai/llama.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
---
22
title: LlamaIndex integration
33
description: Learn how to integrate Apify with LlamaIndex in order to feed vector databases and LLMs with data crawled from the web.
4-
sidebar_label: Llamaindex
4+
sidebar_label: LlamaIndex
55
sidebar_position: 6
66
slug: /integrations/llama
77
---

0 commit comments

Comments
 (0)