@@ -36,6 +36,136 @@ let adapter = HttpAdapter::with_config(HttpConfig {
3636
3737---
3838
39+ ## REST API Adapter
40+
41+ Purpose-built for structured JSON REST APIs. Handles authentication, automatic
42+ multi-strategy pagination, JSON response extraction, and retry — without the caller
43+ needing to manage any of that manually.
44+
45+ ``` rust
46+ use stygian_graph :: adapters :: rest_api :: {RestApiAdapter , RestApiConfig };
47+ use stygian_graph :: ports :: {ScrapingService , ServiceInput };
48+ use serde_json :: json;
49+ use std :: time :: Duration ;
50+
51+ let adapter = RestApiAdapter :: with_config (RestApiConfig {
52+ timeout : Duration :: from_secs (20 ),
53+ max_retries : 3 ,
54+ .. Default :: default ()
55+ });
56+
57+ let input = ServiceInput {
58+ url : " https://api.github.com/repos/rust-lang/rust/issues" . to_string (),
59+ params : json! ({
60+ " auth" : { " type" : " bearer" , " token" : " ${env:GITHUB_TOKEN}" },
61+ " query" : { " state" : " open" , " per_page" : " 100" },
62+ " pagination" : { " strategy" : " link_header" , " max_pages" : 10 },
63+ " response" : { " data_path" : "" }
64+ }),
65+ };
66+ // let output = adapter.execute(input).await?;
67+ ```
68+
69+ ** Registered service name** : ` "rest-api" `
70+
71+ ### Config fields
72+
73+ | Field | Default | Description |
74+ | ---| ---| ---|
75+ | ` timeout ` | 30 s | Per-request timeout |
76+ | ` max_retries ` | 3 | Retry attempts on transient errors (` 429 ` , ` 5xx ` , network) |
77+ | ` retry_base_delay ` | 1 s | Base for exponential backoff |
78+ | ` proxy_url ` | ` None ` | HTTP/HTTPS/SOCKS5 proxy URL |
79+
80+ ### ` ServiceInput.params ` contract
81+
82+ | Param | Required | Default | Description |
83+ | ---| ---| ---| ---|
84+ | ` method ` | — | ` "GET" ` | ` GET ` , ` POST ` , ` PUT ` , ` PATCH ` , ` DELETE ` , ` HEAD ` |
85+ | ` body ` | — | — | JSON body for ` POST ` /` PUT ` /` PATCH ` |
86+ | ` body_raw ` | — | — | Raw string body (takes precedence over ` body ` ) |
87+ | ` headers ` | — | — | Extra request headers object |
88+ | ` query ` | — | — | Extra query string parameters object |
89+ | ` accept ` | — | ` "application/json" ` | ` Accept ` header |
90+ | ` auth ` | — | none | Authentication object (see below) |
91+ | ` response.data_path ` | — | full body | Dot path into the JSON response to extract |
92+ | ` response.collect_as_array ` | — | ` false ` | Force multi-page results into a JSON array |
93+ | ` pagination.strategy ` | — | ` "none" ` | ` "none" ` , ` "offset" ` , ` "cursor" ` , ` "link_header" ` |
94+ | ` pagination.max_pages ` | — | ` 1 ` | Maximum pages to fetch |
95+
96+ ### Authentication
97+
98+ ``` toml
99+ # Bearer token
100+ [nodes .params .auth ]
101+ type = " bearer"
102+ token = " ${env:API_TOKEN}"
103+
104+ # HTTP Basic
105+ [nodes .params .auth ]
106+ type = " basic"
107+ username = " ${env:API_USER}"
108+ password = " ${env:API_PASS}"
109+
110+ # API key in header
111+ [nodes .params .auth ]
112+ type = " api_key_header"
113+ header = " X-Api-Key"
114+ key = " ${env:API_KEY}"
115+
116+ # API key in query string
117+ [nodes .params .auth ]
118+ type = " api_key_query"
119+ param = " api_key"
120+ key = " ${env:API_KEY}"
121+ ```
122+
123+ ### Pagination strategies
124+
125+ | Strategy | How it works | Best for |
126+ | ---| ---| ---|
127+ | ` "none" ` | Single request | Simple endpoints |
128+ | ` "offset" ` | Increments ` page_param ` from ` start_page ` | REST APIs with ` ?page=N ` |
129+ | ` "cursor" ` | Extracts next cursor from ` cursor_field ` (dot path), sends as ` cursor_param ` | GraphQL-REST hybrids, Stripe-style |
130+ | ` "link_header" ` | Follows RFC 8288 ` Link: <url>; rel="next" ` | GitHub API, GitLab API |
131+
132+ #### Offset example
133+
134+ ``` toml
135+ [nodes .params .pagination ]
136+ strategy = " offset"
137+ page_param = " page"
138+ page_size_param = " per_page"
139+ page_size = 100
140+ start_page = 1
141+ max_pages = 20
142+ ```
143+
144+ #### Cursor example
145+
146+ ``` toml
147+ [nodes .params .pagination ]
148+ strategy = " cursor"
149+ cursor_param = " after"
150+ cursor_field = " meta.next_cursor"
151+ max_pages = 50
152+ ```
153+
154+ ### Output
155+
156+ ` ServiceOutput.data ` — pretty-printed JSON string of the extracted data.
157+
158+ ` ServiceOutput.metadata ` :
159+
160+ ``` json
161+ {
162+ "url" : " https://..." ,
163+ "page_count" : 3
164+ }
165+ ```
166+
167+ ---
168+
39169## Browser Adapter
40170
41171Delegates to ` stygian-browser ` for JavaScript-rendered pages. Requires the ` browser `
@@ -260,3 +390,104 @@ let service = GraphQlService::new(GraphQlConfig::default(), Some(Arc::new(regist
260390
261391See the [ GraphQL Plugins] ( ./graphql-plugins.md ) page for the full builder reference,
262392` AuthPort ` implementation guide, proactive cost throttling, and custom plugin examples.
393+
394+ ---
395+
396+ ## Cloudflare Browser Rendering adapter
397+
398+ Submits a multi-page crawl job to the [ Cloudflare Browser Rendering API] ( https://developers.cloudflare.com/browser-rendering/ ) ,
399+ polls until it completes, and returns the aggregated content. All page rendering is done
400+ inside Cloudflare's infrastructure — no local Chrome binary needed.
401+
402+ ** Feature flag** : ` cloudflare-crawl ` (not included in ` default ` or ` browser ` ; add it
403+ explicitly or use ` full ` ).
404+
405+ ### Quick start
406+
407+ ``` toml
408+ # Cargo.toml
409+ [dependencies ]
410+ stygian-graph = { version = " 0.1" , features = [" cloudflare-crawl" ] }
411+ ```
412+
413+ ``` rust
414+ use stygian_graph :: adapters :: cloudflare_crawl :: {
415+ CloudflareCrawlAdapter , CloudflareCrawlConfig ,
416+ };
417+ use std :: time :: Duration ;
418+
419+ let adapter = CloudflareCrawlAdapter :: with_config (CloudflareCrawlConfig {
420+ poll_interval : Duration :: from_secs (3 ),
421+ job_timeout : Duration :: from_secs (120 ),
422+ .. Default :: default ()
423+ });
424+ ```
425+
426+ ** Registered service name** : ` "cloudflare-crawl" `
427+
428+ ### ` ServiceInput.params ` contract
429+
430+ All per-request options are passed via ` ServiceInput.params ` . ` account_id ` and
431+ ` api_token ` are ** required** ; the rest are optional and forwarded verbatim to the
432+ Cloudflare API.
433+
434+ | Param key | Required | Default | Description |
435+ | ---| ---| ---| ---|
436+ | ` account_id ` | ✅ | — | Cloudflare account ID |
437+ | ` api_token ` | ✅ | — | Cloudflare API token with Browser Rendering permission |
438+ | ` output_format ` | — | ` "markdown" ` | ` "markdown" ` , ` "html" ` , or ` "raw" ` |
439+ | ` max_depth ` | — | API default | Maximum crawl depth from the seed URL |
440+ | ` max_pages ` | — | API default | Maximum pages to crawl |
441+ | ` url_pattern ` | — | API default | Regex or glob restricting which URLs are followed |
442+ | ` modified_since ` | — | API default | ISO-8601 timestamp; skip pages not modified since |
443+ | ` max_age_seconds ` | — | API default | Skip cached pages older than this many seconds |
444+ | ` static_mode ` | — | ` false ` | Set ` "true" ` to skip JS execution (faster, static HTML only) |
445+
446+ ### Config fields
447+
448+ | Field | Default | Description |
449+ | ---| ---| ---|
450+ | ` poll_interval ` | 2 s | How often to poll for job completion |
451+ | ` job_timeout ` | 5 min | Hard timeout per crawl job; returns ` ServiceError::Timeout ` if exceeded |
452+
453+ ### Output
454+
455+ ` ServiceOutput.data ` contains the page content of all crawled pages joined by newlines.
456+ ` ServiceOutput.metadata ` is a JSON object:
457+
458+ ``` json
459+ {
460+ "job_id" : " some-uuid" ,
461+ "pages" : 12 ,
462+ "url_count" : 12
463+ }
464+ ```
465+
466+ ### TOML pipeline usage
467+
468+ ``` toml
469+ [[nodes ]]
470+ id = " crawl"
471+ type = " scrape"
472+ target = " https://docs.example.com"
473+
474+ [nodes .params ]
475+ account_id = " ${env:CF_ACCOUNT_ID}"
476+ api_token = " ${env:CF_API_TOKEN}"
477+ output_format = " markdown"
478+ max_depth = " 3"
479+ max_pages = " 50"
480+ url_pattern = " https://docs.example.com/**"
481+
482+ [nodes .service ]
483+ name = " cloudflare-crawl"
484+ ```
485+
486+ ### Error mapping
487+
488+ | Condition | ` StygianError ` variant |
489+ | ---| ---|
490+ | Missing ` account_id ` or ` api_token ` | ` ServiceError::Unavailable ` |
491+ | Cloudflare API non-2xx | ` ServiceError::Unavailable ` (with CF error code) |
492+ | Job still pending after ` job_timeout ` | ` ServiceError::Timeout ` |
493+ | Unexpected response shape | ` ServiceError::InvalidResponse ` |
0 commit comments