|
9 | 9 |
|
10 | 10 | graph_config = { |
11 | 11 | "llm": { |
12 | | - "model": "ollama/llama3", |
| 12 | + "model": "ollama/llama3.1:8b", |
13 | 13 | "temperature": 0, |
14 | 14 | "format": "json", # Ollama needs the format to be specified explicitly |
15 | 15 | # "base_url": "http://localhost:11434", # set ollama URL arbitrarily |
16 | 16 | }, |
17 | 17 |
|
18 | 18 | "verbose": True, |
19 | | - "headless": False |
| 19 | + "headless": False, |
| 20 | + "filter_config": { |
| 21 | + "diff_domain_filter": True, |
| 22 | + # "img_exts": ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp', '.ico'], |
| 23 | + # "lang_indicators": ['lang=', '/fr', '/pt', '/es', '/de', '/jp', '/it'], |
| 24 | + # "irrelevant_keywords": [ |
| 25 | + # '/login', '/signup', '/register', '/contact', 'facebook.com', 'twitter.com', |
| 26 | + # 'linkedin.com', 'instagram.com', '.js', '.css', '/wp-content/', '/wp-admin/', |
| 27 | + # '/wp-includes/', '/wp-json/', '/wp-comments-post.php', ';amp', '/about', |
| 28 | + # '/careers', '/jobs', '/privacy', '/terms', '/legal', '/faq', '/help', |
| 29 | + # '.pdf', '.zip', '/news', '/files', '/downloads' |
| 30 | + # ] |
| 31 | + }, |
20 | 32 | } |
21 | 33 |
|
22 | 34 | # ************************************************ |
|
0 commit comments