Skip to content

Commit fa0c270

Browse files
committed
add playwright basic script
1 parent e1d024f commit fa0c270

File tree

5 files changed

+118
-2
lines changed

5 files changed

+118
-2
lines changed

apps/13_langchain_toolkits/README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,18 @@ $ pip install -r requirements.txt
3333

3434
### Examples
3535

36-
#### [PlayWright Browser Toolkit](https://python.langchain.com/docs/integrations/tools/playwright/)
36+
#### Playwright
37+
38+
##### [Playwright for Python](https://playwright.dev/python/docs/intro)
39+
40+
- [Test generator](https://playwright.dev/docs/codegen)
41+
42+
```shell
43+
# Generate Python code from PlayWright
44+
$ npx playwright codegen --target=python
45+
```
46+
47+
##### [PlayWright Browser Toolkit](https://python.langchain.com/docs/integrations/tools/playwright/)
3748

3849
```shell
3950
$ poetry run playwright install
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import asyncio
2+
import logging
3+
import time
4+
5+
import typer
6+
from dotenv import load_dotenv
7+
from playwright.async_api import Page, async_playwright
8+
from playwright.sync_api import sync_playwright
9+
10+
load_dotenv()
11+
logger = logging.getLogger(__name__)
12+
app = typer.Typer()
13+
14+
15+
async def scrape_task(page: Page, page_number: int):
16+
url = f"https://expert.visasq.com/issue/?keyword=&is_started_only=true&page={page_number}"
17+
print(f"Downloading {url}...")
18+
try:
19+
await page.goto(url)
20+
await page.wait_for_load_state()
21+
await page.screenshot(
22+
path=f"screenshot_{page_number}.png",
23+
full_page=True,
24+
)
25+
except Exception as e:
26+
logger.error(f"error: {e}, url: {url}")
27+
print(f"Downloaded {url}")
28+
29+
30+
async def _get_visasq_cases():
31+
tasks = []
32+
async with async_playwright() as p:
33+
logger.debug("Launch browser")
34+
35+
browser = await p.chromium.launch()
36+
context = await browser.new_context()
37+
38+
for i in range(1, 20):
39+
page = await context.new_page()
40+
tasks.append(
41+
asyncio.create_task(
42+
scrape_task(
43+
page=page,
44+
page_number=i,
45+
)
46+
)
47+
)
48+
await asyncio.gather(*tasks)
49+
50+
# Finalize
51+
await context.close()
52+
await browser.close()
53+
54+
55+
@app.command()
56+
def get_visasq_cases(
57+
verbose: bool = False,
58+
):
59+
if verbose:
60+
logging.basicConfig(level=logging.DEBUG)
61+
62+
start = time.time()
63+
asyncio.run(_get_visasq_cases())
64+
end = time.time()
65+
elapsed_time = end - start
66+
print(f"経過時間:{elapsed_time:.2f} 秒")
67+
68+
69+
@app.command()
70+
def get_yahoo_realtime_trends(
71+
verbose: bool = False,
72+
) -> None:
73+
if verbose:
74+
logging.basicConfig(level=logging.DEBUG)
75+
76+
with sync_playwright() as p:
77+
logger.debug("Launch browser")
78+
browser = p.chromium.launch()
79+
context = browser.new_context()
80+
page = context.new_page()
81+
logger.debug("Go to Yahoo Realtime Trends")
82+
page.goto(url="https://search.yahoo.co.jp/realtime")
83+
trends = []
84+
for i in range(20):
85+
keyword = page.get_by_role("link", name=f"{i+1}", exact=True).text_content()
86+
trends.append(
87+
{
88+
"rank": i + 1,
89+
"keyword": keyword,
90+
},
91+
)
92+
# Finalize
93+
context.close()
94+
browser.close()
95+
96+
print(trends)
97+
98+
99+
if __name__ == "__main__":
100+
app()

poetry.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ langgraph-checkpoint-sqlite = "^1.0.4"
3535
playwright = "^1.47.0"
3636
lxml = "^5.3.0"
3737
nest-asyncio = "^1.6.0"
38+
typer = "^0.12.5"
3839

3940
[tool.poetry.group.dev.dependencies]
4041
pre-commit = "^3.8.0"

requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ langgraph==0.2.23
1818
langchain-chroma==0.1.4
1919
beautifulsoup4==4.12.3
2020
langgraph-checkpoint-sqlite==1.0.4
21+
playwright==1.47.0
22+
lxml==5.3.0
23+
nest-asyncio==1.6.0
24+
typer==0.12.5
2125

2226
# To run 99_streamlit_examples/pages/10_Object_Detection.py
2327
# ultralytics==8.2.89

0 commit comments

Comments
 (0)