|
24 | 24 | "default": {},
|
25 | 25 | "editor": "proxy",
|
26 | 26 | "prefill": {
|
27 |
| - "useApifyProxy": false |
| 27 | + "useApifyProxy": true, |
| 28 | + "apifyProxyGroups": [ |
| 29 | + "SHADER", |
| 30 | + "BUYPROXIES94952", |
| 31 | + "RESIDENTIAL" |
| 32 | + ] |
28 | 33 | }
|
29 | 34 | },
|
30 | 35 | "checkers.cheerio": {
|
|
47 | 52 | "title": "Playwright",
|
48 | 53 | "type": "boolean",
|
49 | 54 | "description": "Crawl with Playwright",
|
50 |
| - "editor": "checkbox" |
| 55 | + "editor": "checkbox", |
| 56 | + "default": true |
51 | 57 | },
|
52 | 58 | "saveSnapshot": {
|
53 | 59 | "title": "Enabled",
|
54 | 60 | "type": "boolean",
|
55 | 61 | "description": "Will save HTML for Cheerio and HTML + screenshot for Puppeteer/Playwright",
|
56 | 62 | "editor": "checkbox",
|
57 |
| - "groupCaption": "Save Snapshots" |
| 63 | + "groupCaption": "Save Snapshots", |
| 64 | + "default": true |
58 | 65 | },
|
59 | 66 | "enqueueAllOnDomain": {
|
60 | 67 | "title": "Enqueue any URL on domain (no need for link selector or pseudo URLs)",
|
61 | 68 | "type": "boolean",
|
62 | 69 | "description": "Will enqueue any URLs on the domain",
|
63 |
| - "default": false |
| 70 | + "default": true |
64 | 71 | },
|
65 | 72 | "linkSelector": {
|
66 | 73 | "title": "Link Selector",
|
67 | 74 | "type": "string",
|
68 | 75 | "description": "A CSS selector saying which links on the page (<code><a></code> elements with <code>href</code> attribute) shall be followed and added to the request queue. This setting only applies if <b>Use request queue</b> is enabled. To filter the links added to the queue, use the <b>Pseudo-URLs</b> setting.<br><br>If <b>Link selector</b> is empty, the page links are ignored.<br><br>For details, see <a href='https://apify.com/apify/web-scraper#link-selector' target='_blank' rel='noopener'>Link selector</a> in README.",
|
69 | 76 | "sectionCaption": "Crawler Options",
|
70 | 77 | "sectionDescription": "Specific options that are relevant for crawlers",
|
71 |
| - "editor": "textfield", |
72 |
| - "prefill": "a[href]", |
73 |
| - "minLength": 1 |
| 78 | + "editor": "textfield" |
74 | 79 | },
|
75 | 80 | "pseudoUrls": {
|
76 | 81 | "title": "Pseudo-URLs",
|
77 | 82 | "type": "array",
|
78 | 83 | "description": "Specifies what kind of URLs found by <b>Link selector</b> should be added to the request queue. A pseudo-URL is a URL with regular expressions enclosed in <code>[]</code> brackets, e.g. <code>http://www.example.com/[.*]</code>. This setting only applies if the <b>Use request queue</b> option is enabled.<br><br>If <b>Pseudo-URLs</b> are omitted, the actor enqueues all links matched by the <b>Link selector</b>.<br><br>For details, see <a href='https://apify.com/apify/web-scraper#pseudo-urls' target='_blank' rel='noopener'>Pseudo-URLs</a> in README.",
|
79 | 84 | "default": [],
|
80 |
| - "editor": "pseudoUrls", |
81 |
| - "prefill": [ |
82 |
| - { |
83 |
| - "purl": "https://www.amazon.com[.*]/dp/[.*]" |
84 |
| - } |
85 |
| - ] |
| 85 | + "editor": "pseudoUrls" |
86 | 86 | },
|
87 | 87 | "repeatChecksOnProvidedUrls": {
|
88 | 88 | "title": "Repeat checks on provided URLs",
|
89 | 89 | "type": "integer",
|
90 | 90 | "description": "Will access each URL multiple times. Useful to test the same URL or bypass blocking of the first page.",
|
91 |
| - "editor": "number" |
| 91 | + "editor": "number", |
| 92 | + "prefill": 10 |
92 | 93 | },
|
93 | 94 | "maxNumberOfPagesCheckedPerDomain": {
|
94 | 95 | "title": "Max number of pages checked per domain",
|
95 | 96 | "type": "integer",
|
96 | 97 | "description": "The maximum number of pages that the checker will load. The checker will stop when this limit is reached. It's always a good idea to set this limit in order to prevent excess platform usage for misconfigured scrapers. Note that the actual number of pages loaded might be slightly higher than this value.<br><br>If set to <code>0</code>, there is no limit.",
|
97 |
| - "default": 100, |
| 98 | + "prefill": 1000, |
98 | 99 | "editor": "number"
|
99 | 100 | },
|
100 | 101 | "maxConcurrentPagesCheckedPerDomain": {
|
101 | 102 | "title": "Maximum concurrent pages checked per domain",
|
102 | 103 | "type": "integer",
|
103 | 104 | "description": "Specifies the maximum number of pages that can be processed by the checker in parallel for one domain. The checker automatically increases and decreases concurrency based on available system resources. This option enables you to set an upper limit, for example to reduce the load on a target website.",
|
104 |
| - "default": 50, |
| 105 | + "default": 500, |
105 | 106 | "editor": "number",
|
106 | 107 | "minimum": 1
|
107 | 108 | },
|
|
147 | 148 | "title": "Wait for",
|
148 | 149 | "type": "string",
|
149 | 150 | "description": "Only works for Puppeteer type. Will wait on each page. You can provide number in ms or a selector.",
|
150 |
| - "editor": "textfield" |
| 151 | + "editor": "textfield", |
| 152 | + "default": "2000" |
151 | 153 | },
|
152 | 154 | "puppeteer.memory": {
|
153 | 155 | "title": "Memory",
|
|
162 | 164 | "title": "Chrome",
|
163 | 165 | "type": "boolean",
|
164 | 166 | "description": "Use Chrome when checking",
|
165 |
| - "default": true, |
| 167 | + "default": false, |
166 | 168 | "sectionCaption": "Playwright options",
|
167 | 169 | "sectionDescription": "Options passed to playwright when checking",
|
168 | 170 | "editor": "checkbox",
|
|
173 | 175 | "title": "Firefox",
|
174 | 176 | "type": "boolean",
|
175 | 177 | "description": "Use Firefox when checking",
|
176 |
| - "editor": "checkbox" |
| 178 | + "editor": "checkbox", |
| 179 | + "default": true |
177 | 180 | },
|
178 | 181 | "playwright.webkit": {
|
179 | 182 | "title": "Safari (Webkit)",
|
|
197 | 200 | "title": "Wait for",
|
198 | 201 | "type": "string",
|
199 | 202 | "description": "Only works for playwright type. Will wait on each page. You can provide number in ms or a selector.",
|
200 |
| - "editor": "textfield" |
| 203 | + "editor": "textfield", |
| 204 | + "default": "2000" |
201 | 205 | },
|
202 | 206 | "playwright.memory": {
|
203 | 207 | "title": "Memory",
|
|
0 commit comments