From 55f11eae71062d0e5c4a7c58de8b622f351e1e56 Mon Sep 17 00:00:00 2001 From: daisyfaithauma Date: Tue, 29 Apr 2025 15:18:58 +0100 Subject: [PATCH 1/3] initial rest API guide --- .../docs/browser-rendering/get-started.mdx | 6 + .../how-to/markdown-extraction.mdx | 178 ++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 src/content/docs/browser-rendering/how-to/markdown-extraction.mdx diff --git a/src/content/docs/browser-rendering/get-started.mdx b/src/content/docs/browser-rendering/get-started.mdx index d75c19a3ac10af5..0a50a8d32bde604 100644 --- a/src/content/docs/browser-rendering/get-started.mdx +++ b/src/content/docs/browser-rendering/get-started.mdx @@ -9,3 +9,9 @@ Browser rendering can be used in two ways: - [Workers Binding API](/browser-rendering/workers-binding-api) for complex scripts. - [REST API](/browser-rendering/rest-api/) for simple actions. + +## Examples + +- [Workers Binding API](/browser-rendering/how-to/ai/): Fetch [https://labs.apnic.net/](https://labs.apnic.net/) and apply a machine-learning model via Workers AI to extract the first post as JSON according to your schema. + +- [REST API](/browser-rendering/how-to/markdown-extraction/): Render and extract the complete JSON output from the [`/markdown` endpoint](/browser-rendering/rest-api/markdown-endpoint) by processing the blog post [Introducing AutoRAG on Cloudflare](https://blog.cloudflare.com/introducing-autorag-on-cloudflare/). diff --git a/src/content/docs/browser-rendering/how-to/markdown-extraction.mdx b/src/content/docs/browser-rendering/how-to/markdown-extraction.mdx new file mode 100644 index 000000000000000..cf70be4cbc21c9e --- /dev/null +++ b/src/content/docs/browser-rendering/how-to/markdown-extraction.mdx @@ -0,0 +1,178 @@ +--- +title: Extracting blog post content as markdown using the markdown endpoint +sidebar: + order: 4 +--- + +This guide shows you how to capture the complete JSON output from Cloudflare's [`/markdown` API endpoint](/browser-rendering/rest-api/markdown-endpoint/). + +We are extracting the content of a blog post from the Cloudflare Blog: [Introducing AutoRAG on Cloudflare](https://blog.cloudflare.com/introducing-autorag-on-cloudflare/) + +## Prerequisites + +1. Cloudflare Account and API Token. + + - [Create a token](/fundamentals/api/get-started/create-token/) with **Browser Rendering: Edit** permissions. + - You can do this under **My Profile → API Tokens → Create Token** on your [Cloudflare dashboard](https://dash.cloudflare.com/). + - Note your **Account ID** (from the dashboard homepage) and **API Token**. + +2. Command-line tools installed. + + - cURL: a command-line tool for sending HTTP requests. + - macOS/Linux: usually preinstalled. + - Windows: available via WSL, Git Bash, or native Windows builds. + +## 1: Configure your environment variables + +Save your sensitive information into environment variables to avoid hardcoding credentials. + +```bash +export CF_ACCOUNT_ID="your-cloudflare-account-id" +export CF_API_TOKEN="your-api-token-with-edit-permissions" +``` + +## 2: Make the API Request and save the raw JSON + +Run this command to fetch the markdown representation of the AutoRAG blog post and store it into a local JSON file: + +```bash +curl -s -X POST \ + "https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/browser-rendering/markdown" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${CF_API_TOKEN}" \ + -d '{ + "url": "https://blog.cloudflare.com/introducing-autorag-on-cloudflare/" + }' \ +> autorag-full-response.json +``` + +The `>` parameter redirects output into a file (`autorag-full-response.json`). + +## 3: Inspect the saved JSON + +You can check the start of the saved JSON file to ensure it looks right: + +```bash +head -n 20 autorag-full-response.json +``` + +```json output +{ + "success": true, + "errors": [], + "messages": [], + "result": "# "[Get Started Free](https://dash.cloudflare.com/sign-up)|[Contact Sales](https://www.cloudflare.com/plans/enterprise/contact/)\n\n[![The Cloudflare Blog](https://cf-assets.www.cloudflare ..." +} +``` + +## 4: (Optional) Skip unwanted resources + +To ignore unnecessary assets like CSS, JavaScript, or images when fetching the page add `rejectRequestPattern` parameter: + +```bash +curl -s -X POST \ + "https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/browser-rendering/markdown" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${CF_API_TOKEN}" \ + -d '{ + "url": "https://blog.cloudflare.com/introducing-autorag-on-cloudflare/", + "rejectRequestPattern": [ + "/^.*\\.(css|js|png|svg)$/" + ] + }' \ +> autorag-no-assets.json +``` + +## 5: Extracting and saving the markdown from the JSON file + +After saving the full response, below is how to how to extract just the Markdown. + +The script does the following: + +1. Reads the full JSON response from `autorag-full-response.json` +2. Extracts the Markdown string from the `"result"` field +3. Writes that Markdown to `autorag-blog.md` + +```py +#!/usr/bin/env python3 +""" +extract_markdown.py + +Reads the full JSON response from Cloudflare's Markdown endpoint +and writes the 'result' field (the converted Markdown) to a .md file. +""" + +import json +import sys +from pathlib import Path + +# Input and output file paths +INPUT_JSON = Path("autorag-full-response.json") +OUTPUT_MD = Path("autorag-blog.md") + +def main(): + # Check that the input file exists + if not INPUT_JSON.is_file(): + print(f"Error: Input file '{INPUT_JSON}' not found.", file=sys.stderr) + sys.exit(1) + + # Load the JSON response + try: + with INPUT_JSON.open("r", encoding="utf-8") as f: + data = json.load(f) + except json.JSONDecodeError as e: + print(f"Error: Failed to parse JSON in '{INPUT_JSON}': {e}", file=sys.stderr) + sys.exit(1) + + # Validate structure + if not data.get("success", False): + print("Error: API reported failure.", file=sys.stderr) + errors = data.get("errors") or data.get("messages") + if errors: + print("Details:", errors, file=sys.stderr) + sys.exit(1) + + if "result" not in data: + print("Error: 'result' field not found in JSON.", file=sys.stderr) + sys.exit(1) + + # Extract and write the Markdown + markdown_content = data["result"] + try: + with OUTPUT_MD.open("w", encoding="utf-8") as md_file: + md_file.write(markdown_content) + except IOError as e: + print(f"Error: Could not write to '{OUTPUT_MD}': {e}", file=sys.stderr) + sys.exit(1) + + print(f"Success: Markdown content written to '{OUTPUT_MD}'.") + +if __name__ == "__main__": + main() +``` + +### Usage + +1. Ensure you have run the `curl` command to produce `autorag-full-response.json`. + +2. Place `extract_markdown.py` in the same directory. + +3. Run: + +``` +python3 extract_markdown.py +``` + +After execution, `autorag-blog.md` will contain the extracted Markdown. + +## Final folder structure + +After following these steps, your working folder will look like: + +``` +. +├── autorag-full-response.json # Full API response +├── autorag-no-assets.json # Full API response without extra assets (optional) +├── autorag-blog.md # Extracted Markdown content +└── extract_markdown.py # Python extraction script (optional) +``` From 19c225ff61229f240b69acf3eaaa02c0e4cd5b20 Mon Sep 17 00:00:00 2001 From: daisyfaithauma Date: Tue, 6 May 2025 12:43:32 +0300 Subject: [PATCH 2/3] Pricing details --- src/content/docs/browser-rendering/platform/pricing.mdx | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 src/content/docs/browser-rendering/platform/pricing.mdx diff --git a/src/content/docs/browser-rendering/platform/pricing.mdx b/src/content/docs/browser-rendering/platform/pricing.mdx new file mode 100644 index 000000000000000..64490c9d8bf86a6 --- /dev/null +++ b/src/content/docs/browser-rendering/platform/pricing.mdx @@ -0,0 +1,8 @@ +--- +pcx_content_type: configuration +title: Pricing +sidebar: + order: 31 +--- + +Browser Rendering service is currently available at no cost up to the [limits](/browser-rendering/platform/limits/) specified until billing begins. Pricing to be announced and we will provide advance notice before any billing begins. From 14e73c4aed8e92d1e0474fdfdd876028fe7e094b Mon Sep 17 00:00:00 2001 From: daisyfaithauma Date: Tue, 6 May 2025 12:47:09 +0300 Subject: [PATCH 3/3] Revert "initial rest API guide" This reverts commit 55f11eae71062d0e5c4a7c58de8b622f351e1e56. --- .../docs/browser-rendering/get-started.mdx | 6 - .../how-to/markdown-extraction.mdx | 178 ------------------ 2 files changed, 184 deletions(-) delete mode 100644 src/content/docs/browser-rendering/how-to/markdown-extraction.mdx diff --git a/src/content/docs/browser-rendering/get-started.mdx b/src/content/docs/browser-rendering/get-started.mdx index 0a50a8d32bde604..d75c19a3ac10af5 100644 --- a/src/content/docs/browser-rendering/get-started.mdx +++ b/src/content/docs/browser-rendering/get-started.mdx @@ -9,9 +9,3 @@ Browser rendering can be used in two ways: - [Workers Binding API](/browser-rendering/workers-binding-api) for complex scripts. - [REST API](/browser-rendering/rest-api/) for simple actions. - -## Examples - -- [Workers Binding API](/browser-rendering/how-to/ai/): Fetch [https://labs.apnic.net/](https://labs.apnic.net/) and apply a machine-learning model via Workers AI to extract the first post as JSON according to your schema. - -- [REST API](/browser-rendering/how-to/markdown-extraction/): Render and extract the complete JSON output from the [`/markdown` endpoint](/browser-rendering/rest-api/markdown-endpoint) by processing the blog post [Introducing AutoRAG on Cloudflare](https://blog.cloudflare.com/introducing-autorag-on-cloudflare/). diff --git a/src/content/docs/browser-rendering/how-to/markdown-extraction.mdx b/src/content/docs/browser-rendering/how-to/markdown-extraction.mdx deleted file mode 100644 index cf70be4cbc21c9e..000000000000000 --- a/src/content/docs/browser-rendering/how-to/markdown-extraction.mdx +++ /dev/null @@ -1,178 +0,0 @@ ---- -title: Extracting blog post content as markdown using the markdown endpoint -sidebar: - order: 4 ---- - -This guide shows you how to capture the complete JSON output from Cloudflare's [`/markdown` API endpoint](/browser-rendering/rest-api/markdown-endpoint/). - -We are extracting the content of a blog post from the Cloudflare Blog: [Introducing AutoRAG on Cloudflare](https://blog.cloudflare.com/introducing-autorag-on-cloudflare/) - -## Prerequisites - -1. Cloudflare Account and API Token. - - - [Create a token](/fundamentals/api/get-started/create-token/) with **Browser Rendering: Edit** permissions. - - You can do this under **My Profile → API Tokens → Create Token** on your [Cloudflare dashboard](https://dash.cloudflare.com/). - - Note your **Account ID** (from the dashboard homepage) and **API Token**. - -2. Command-line tools installed. - - - cURL: a command-line tool for sending HTTP requests. - - macOS/Linux: usually preinstalled. - - Windows: available via WSL, Git Bash, or native Windows builds. - -## 1: Configure your environment variables - -Save your sensitive information into environment variables to avoid hardcoding credentials. - -```bash -export CF_ACCOUNT_ID="your-cloudflare-account-id" -export CF_API_TOKEN="your-api-token-with-edit-permissions" -``` - -## 2: Make the API Request and save the raw JSON - -Run this command to fetch the markdown representation of the AutoRAG blog post and store it into a local JSON file: - -```bash -curl -s -X POST \ - "https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/browser-rendering/markdown" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer ${CF_API_TOKEN}" \ - -d '{ - "url": "https://blog.cloudflare.com/introducing-autorag-on-cloudflare/" - }' \ -> autorag-full-response.json -``` - -The `>` parameter redirects output into a file (`autorag-full-response.json`). - -## 3: Inspect the saved JSON - -You can check the start of the saved JSON file to ensure it looks right: - -```bash -head -n 20 autorag-full-response.json -``` - -```json output -{ - "success": true, - "errors": [], - "messages": [], - "result": "# "[Get Started Free](https://dash.cloudflare.com/sign-up)|[Contact Sales](https://www.cloudflare.com/plans/enterprise/contact/)\n\n[![The Cloudflare Blog](https://cf-assets.www.cloudflare ..." -} -``` - -## 4: (Optional) Skip unwanted resources - -To ignore unnecessary assets like CSS, JavaScript, or images when fetching the page add `rejectRequestPattern` parameter: - -```bash -curl -s -X POST \ - "https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/browser-rendering/markdown" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer ${CF_API_TOKEN}" \ - -d '{ - "url": "https://blog.cloudflare.com/introducing-autorag-on-cloudflare/", - "rejectRequestPattern": [ - "/^.*\\.(css|js|png|svg)$/" - ] - }' \ -> autorag-no-assets.json -``` - -## 5: Extracting and saving the markdown from the JSON file - -After saving the full response, below is how to how to extract just the Markdown. - -The script does the following: - -1. Reads the full JSON response from `autorag-full-response.json` -2. Extracts the Markdown string from the `"result"` field -3. Writes that Markdown to `autorag-blog.md` - -```py -#!/usr/bin/env python3 -""" -extract_markdown.py - -Reads the full JSON response from Cloudflare's Markdown endpoint -and writes the 'result' field (the converted Markdown) to a .md file. -""" - -import json -import sys -from pathlib import Path - -# Input and output file paths -INPUT_JSON = Path("autorag-full-response.json") -OUTPUT_MD = Path("autorag-blog.md") - -def main(): - # Check that the input file exists - if not INPUT_JSON.is_file(): - print(f"Error: Input file '{INPUT_JSON}' not found.", file=sys.stderr) - sys.exit(1) - - # Load the JSON response - try: - with INPUT_JSON.open("r", encoding="utf-8") as f: - data = json.load(f) - except json.JSONDecodeError as e: - print(f"Error: Failed to parse JSON in '{INPUT_JSON}': {e}", file=sys.stderr) - sys.exit(1) - - # Validate structure - if not data.get("success", False): - print("Error: API reported failure.", file=sys.stderr) - errors = data.get("errors") or data.get("messages") - if errors: - print("Details:", errors, file=sys.stderr) - sys.exit(1) - - if "result" not in data: - print("Error: 'result' field not found in JSON.", file=sys.stderr) - sys.exit(1) - - # Extract and write the Markdown - markdown_content = data["result"] - try: - with OUTPUT_MD.open("w", encoding="utf-8") as md_file: - md_file.write(markdown_content) - except IOError as e: - print(f"Error: Could not write to '{OUTPUT_MD}': {e}", file=sys.stderr) - sys.exit(1) - - print(f"Success: Markdown content written to '{OUTPUT_MD}'.") - -if __name__ == "__main__": - main() -``` - -### Usage - -1. Ensure you have run the `curl` command to produce `autorag-full-response.json`. - -2. Place `extract_markdown.py` in the same directory. - -3. Run: - -``` -python3 extract_markdown.py -``` - -After execution, `autorag-blog.md` will contain the extracted Markdown. - -## Final folder structure - -After following these steps, your working folder will look like: - -``` -. -├── autorag-full-response.json # Full API response -├── autorag-no-assets.json # Full API response without extra assets (optional) -├── autorag-blog.md # Extracted Markdown content -└── extract_markdown.py # Python extraction script (optional) -```