Skip to content

Commit 6647dac

Browse files
committed
add lite image without docker
1 parent 62ca030 commit 6647dac

File tree

9 files changed

+120
-36
lines changed

9 files changed

+120
-36
lines changed

.dockerignore

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Git
2+
.git
3+
.gitignore
4+
.gitattributes
5+
6+
7+
# CI
8+
.codeclimate.yml
9+
.travis.yml
10+
.taskcluster.yml
11+
12+
# Docker
13+
docker-compose.yml
14+
docker-compose.yaml
15+
Dockerfile
16+
.docker
17+
.dockerignore
18+
19+
# Byte-compiled / optimized / DLL files
20+
**/__pycache__/
21+
**/*.py[cod]
22+
23+
# C extensions
24+
*.so
25+
26+
# Distribution / packaging
27+
.Python
28+
env/
29+
build/
30+
develop-eggs/
31+
dist/
32+
downloads/
33+
eggs/
34+
lib/
35+
lib64/
36+
parts/
37+
sdist/
38+
var/
39+
*.egg-info/
40+
.installed.cfg
41+
*.egg
42+
43+
# PyInstaller
44+
# Usually these files are written by a python script from a template
45+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
46+
*.manifest
47+
*.spec
48+
49+
# Installer logs
50+
pip-log.txt
51+
pip-delete-this-directory.txt
52+
53+
# Unit test / coverage reports
54+
htmlcov/
55+
.tox/
56+
.coverage
57+
.cache
58+
nosetests.xml
59+
coverage.xml
60+
61+
# Translations
62+
*.mo
63+
*.pot
64+
65+
# Django stuff:
66+
*.log
67+
68+
# Sphinx documentation
69+
docs/_build/
70+
71+
# PyBuilder
72+
target/
73+
74+
# Virtual environment
75+
.env
76+
.venv/
77+
venv/
78+
79+
# PyCharm
80+
.idea
81+
82+
# Python mode for VIM
83+
.ropeproject
84+
**/.ropeproject
85+
86+
# Vim swap files
87+
**/*.swp
88+
89+
# VS Code
90+
.vscode/

.github/workflows/docker-image.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,17 @@ jobs:
2020
name: Set up Docker Buildx
2121
uses: docker/setup-buildx-action@v3
2222
-
23-
name: Build and push
23+
name: Build and push default image
2424
uses: docker/build-push-action@v6
2525
with:
2626
push: true
27+
build-args: BUILD_TYPE=default
2728
tags: ${{ github.repository }}:latest
29+
30+
-
31+
name: Build and push lite image
32+
uses: docker/build-push-action@v6
33+
with:
34+
build-args: BUILD_TYPE=lite
35+
push: true
36+
tags: ${{ github.repository }}:lite-latest

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.13

Dockerfile

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
22

3+
# Define an ARG for the build type
4+
ARG BUILD_TYPE=default
5+
36
LABEL maintainer="loorisr"
47
LABEL repository="https://github.com/loorisr/patchright-scrape-api"
5-
LABEL description="Simple scraping API based on patchright "
6-
LABEL date="2025-02-26"
8+
LABEL description="Simple scraping API based on patchright"
9+
LABEL date="2025-02-27"
710

811
# Install the project into `/app`
912
WORKDIR /app
@@ -26,33 +29,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
2629
# Place executables in the environment at the front of the path
2730
ENV PATH="/app/.venv/bin:$PATH"
2831

29-
# Install patchright
30-
RUN patchright install chrome
31-
32-
# Install Playwright dependencies. Uses less space than playwright install --with-deps chromium
33-
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
34-
libglib2.0-0 \
35-
libnss3 \
36-
libnspr4 \
37-
libatk1.0-0 \
38-
libatk-bridge2.0-0 \
39-
libcups2 \
40-
libdrm2 \
41-
libdbus-1-3 \
42-
libxcb1 \
43-
libxkbcommon0 \
44-
libx11-6 \
45-
libxcomposite1 \
46-
libxdamage1 \
47-
libxext6 \
48-
libxfixes3 \
49-
libxrandr2 \
50-
libgbm1 \
51-
libpango-1.0-0 \
52-
libcairo2 \
53-
libasound2 \
54-
libatspi2.0-0 \
55-
&& rm -rf /var/lib/apt/lists/*
32+
# Install patchright with Chrome
33+
RUN if [ "$BUILD_TYPE" != "lite" ]; then \
34+
patchright install chrome; \
35+
fi
5636

5737
# Then, add the rest of the project source code and install it
5838
# Installing separately from its dependencies allows optimal layer caching

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Features:
1616
* return cleaned html and markdown
1717
* temporary or persistent context
1818
* can connect to remote browser via CDP
19-
* lightweight: 1.2 Go
19+
* lightweight: 1.21 Go / 334 mo without Chrome (for remote CDP connection)
2020

2121
Available on Docker hub: `docker pull loorisr/patchright-scrape-api:latest`
2222

app/app.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ async def lifespan(app: FastAPI):
173173
playwright = await async_playwright().start()
174174

175175
if REMOTE_CDP:
176+
print(f"Launching Chrome with remote connection to {REMOTE_CDP}")
176177
browser = await playwright.chromium.connect_over_cdp(f"wss://{REMOTE_CDP}")
177178
context = browser.contexts[0]
178179
else:

docker-compose.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
services:
22
patchright:
33
container_name: patchright-scrape-api
4-
build: .
4+
build:
5+
context: .
6+
args:
7+
BUILD_TYPE: "default" # or "lite" to build without Chrome
58
ports:
69
- "3000:3000"
710
environment:
811
- DOMAIN_BLOCKED_DOMAINS=
912
- DOMAIN_BLOCKLIST_URL=
10-
- DOMAIN_BLOCKLIST_PATH=blocklist.txt
13+
- DOMAIN_BLOCKLIST_PATH=
1114
- RESOURCES_BLOCKED=['image', 'stylesheet', 'media', 'font','other']
1215
- PROXY_SERVER=
1316
- PROXY_USERNAME=

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
2-
name = "playwright-scrape-api"
2+
name = "patchright-scrape-api"
33
version = "0.1.0"
4-
description = "Add your description here"
4+
description = "Simple scraping API based on patchright"
55
readme = "README.md"
66
requires-python = ">=3.13"
77
dependencies = [

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)