@@ -31,11 +31,17 @@ anyio==4.3.0
3131 # via openai
3232 # via starlette
3333 # via watchfiles
34+ astroid==3.2.2
35+ # via pylint
36+ async-timeout==4.0.3
37+ # via aiohttp
38+ # via langchain
3439attrs==23.2.0
3540 # via aiohttp
3641 # via jsonschema
3742 # via referencing
3843babel==2.15.0
44+ # via courlan
3945 # via sphinx
4046beautifulsoup4==4.12.3
4147 # via furo
@@ -57,22 +63,31 @@ certifi==2024.2.2
5763 # via httpcore
5864 # via httpx
5965 # via requests
66+ # via trafilatura
6067charset-normalizer==3.3.2
68+ # via htmldate
6169 # via requests
70+ # via trafilatura
6271click==8.1.7
6372 # via burr
6473 # via streamlit
6574 # via typer
6675 # via uvicorn
6776contourpy==1.2.1
6877 # via matplotlib
78+ courlan==1.2.0
79+ # via trafilatura
6980cycler==0.12.1
7081 # via matplotlib
7182dataclasses-json==0.6.6
7283 # via langchain
7384 # via langchain-community
85+ dateparser==1.2.0
86+ # via htmldate
7487defusedxml==0.7.1
7588 # via langchain-anthropic
89+ dill==0.3.8
90+ # via pylint
7691distro==1.9.0
7792 # via anthropic
7893 # via groq
@@ -83,6 +98,9 @@ docutils==0.19
8398 # via sphinx
8499email-validator==2.1.1
85100 # via fastapi
101+ exceptiongroup==1.2.1
102+ # via anyio
103+ # via pytest
86104faiss-cpu==1.8.0
87105 # via scrapegraphai
88106fastapi==0.111.0
@@ -139,6 +157,7 @@ graphviz==0.20.3
139157 # via scrapegraphai
140158greenlet==3.0.3
141159 # via playwright
160+ # via sqlalchemy
142161groq==0.8.0
143162 # via langchain-groq
144163grpcio==1.64.0
@@ -151,6 +170,8 @@ h11==0.14.0
151170 # via uvicorn
152171html2text==2024.2.26
153172 # via scrapegraphai
173+ htmldate==1.8.1
174+ # via trafilatura
154175httpcore==1.0.5
155176 # via httpx
156177httplib2==0.22.0
@@ -176,8 +197,14 @@ idna==3.7
176197 # via yarl
177198imagesize==1.4.1
178199 # via sphinx
200+ importlib-metadata==8.0.0
201+ # via sphinx
202+ importlib-resources==6.4.0
203+ # via matplotlib
179204iniconfig==2.0.0
180205 # via pytest
206+ isort==5.13.2
207+ # via pylint
181208jinja2==3.1.4
182209 # via altair
183210 # via burr
@@ -198,6 +225,8 @@ jsonschema==4.22.0
198225 # via altair
199226jsonschema-specifications==2023.12.1
200227 # via jsonschema
228+ justext==3.0.1
229+ # via trafilatura
201230kiwisolver==1.4.5
202231 # via matplotlib
203232langchain==0.1.15
@@ -236,6 +265,12 @@ loguru==0.7.2
236265 # via burr
237266lxml==5.2.2
238267 # via free-proxy
268+ # via htmldate
269+ # via justext
270+ # via lxml-html-clean
271+ # via trafilatura
272+ lxml-html-clean==0.1.1
273+ # via lxml
239274markdown-it-py==3.0.0
240275 # via rich
241276markupsafe==2.1.5
@@ -244,6 +279,8 @@ marshmallow==3.21.2
244279 # via dataclasses-json
245280matplotlib==3.9.0
246281 # via burr
282+ mccabe==0.7.0
283+ # via pylint
247284mdurl==0.1.2
248285 # via markdown-it-py
249286minify-html==0.15.0
@@ -291,6 +328,8 @@ pillow==10.3.0
291328 # via fireworks-ai
292329 # via matplotlib
293330 # via streamlit
331+ platformdirs==4.2.2
332+ # via pylint
294333playwright==1.43.0
295334 # via scrapegraphai
296335 # via undetected-playwright
@@ -336,6 +375,7 @@ pygments==2.18.0
336375 # via furo
337376 # via rich
338377 # via sphinx
378+ pylint==3.2.5
339379pyparsing==3.1.2
340380 # via httplib2
341381 # via matplotlib
@@ -344,6 +384,8 @@ pytest==8.0.0
344384pytest-mock==3.14.0
345385python-dateutil==2.9.0.post0
346386 # via botocore
387+ # via dateparser
388+ # via htmldate
347389 # via matplotlib
348390 # via pandas
349391python-dotenv==1.0.1
@@ -352,6 +394,7 @@ python-dotenv==1.0.1
352394python-multipart==0.0.9
353395 # via fastapi
354396pytz==2024.1
397+ # via dateparser
355398 # via pandas
356399pyyaml==6.0.1
357400 # via huggingface-hub
@@ -363,6 +406,7 @@ referencing==0.35.1
363406 # via jsonschema
364407 # via jsonschema-specifications
365408regex==2024.5.15
409+ # via dateparser
366410 # via tiktoken
367411requests==2.32.2
368412 # via burr
@@ -439,10 +483,17 @@ tenacity==8.3.0
439483tiktoken==0.6.0
440484 # via langchain-openai
441485 # via scrapegraphai
486+ tld==0.13
487+ # via courlan
442488tokenizers==0.19.1
443489 # via anthropic
444490toml==0.10.2
445491 # via streamlit
492+ tomli==2.0.1
493+ # via pylint
494+ # via pytest
495+ tomlkit==0.12.5
496+ # via pylint
446497toolz==0.12.1
447498 # via altair
448499tornado==6.4
@@ -453,10 +504,15 @@ tqdm==4.66.4
453504 # via openai
454505 # via scrapegraphai
455506 # via semchunk
507+ trafilatura==1.10.0
508+ # via scrapegraphai
456509typer==0.12.3
457510 # via fastapi-cli
458511typing-extensions==4.12.0
512+ # via altair
459513 # via anthropic
514+ # via anyio
515+ # via astroid
460516 # via fastapi
461517 # via fastapi-pagination
462518 # via google-generativeai
@@ -466,16 +522,21 @@ typing-extensions==4.12.0
466522 # via pydantic
467523 # via pydantic-core
468524 # via pyee
525+ # via pylint
469526 # via sf-hamilton
470527 # via sqlalchemy
528+ # via starlette
471529 # via streamlit
472530 # via typer
473531 # via typing-inspect
532+ # via uvicorn
474533typing-inspect==0.9.0
475534 # via dataclasses-json
476535 # via sf-hamilton
477536tzdata==2024.1
478537 # via pandas
538+ tzlocal==5.2
539+ # via dateparser
479540ujson==5.10.0
480541 # via fastapi
481542undetected-playwright==0.3.0
@@ -484,7 +545,10 @@ uritemplate==4.1.1
484545 # via google-api-python-client
485546urllib3==1.26.18
486547 # via botocore
548+ # via courlan
549+ # via htmldate
487550 # via requests
551+ # via trafilatura
488552uvicorn==0.29.0
489553 # via burr
490554 # via fastapi
@@ -496,3 +560,6 @@ websockets==12.0
496560 # via uvicorn
497561yarl==1.9.4
498562 # via aiohttp
563+ zipp==3.19.2
564+ # via importlib-metadata
565+ # via importlib-resources
0 commit comments