Skip to content

Commit 305e1f2

Browse files
committed
Adding dataset images
1 parent 0369446 commit 305e1f2

File tree

9 files changed

+381
-2
lines changed

9 files changed

+381
-2
lines changed

.dockerignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ LICENSE
2222
README.md
2323
tox.ini
2424
*.secret.env
25+
./data/
26+
node_modules/

Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
FROM frictionlessdata/datapackage-pipelines:2.1.8
22

3-
RUN apk --update --no-cache add bash wget
3+
RUN apk --update --no-cache add bash wget nodejs npm nss chromium
4+
RUN npm install -g npm@latest
5+
RUN cd /pipelines/ && PUPPETEER_SKIP_CHROMIUM_DOWNLOAD="true" npm install puppeteer
46

57
COPY docker-dpp-run.sh /dpp/docker/run.sh
68

@@ -12,6 +14,7 @@ COPY setup.py /pipelines/
1214
RUN python3 -m pip install -e .
1315

1416
ENV DPP_ELASTICSEARCH=localhost:19200
17+
ENV CHROME_BIN="/usr/bin/chromium-browser"
1518

1619
COPY datapackage_pipelines_migdar /pipelines/datapackage_pipelines_migdar
1720
COPY download_search_results_unique_records.sh /pipelines/
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import os
2+
import subprocess
3+
4+
import dataflows as DF
5+
6+
SCREENSHOT = os.path.join(os.path.dirname(__file__), 'node', 'screenshot.js')
7+
8+
9+
def do_screenshot():
10+
def func(row):
11+
doc_id = row['doc_id']
12+
url = f'https://yodaat.org/card/{doc_id}'
13+
outpath = os.path.join('data', os.path.dirname(doc_id))
14+
os.makedirs(outpath, exist_ok=True)
15+
outpath = os.path.join('data', doc_id + '.png')
16+
subprocess.call(['node', SCREENSHOT, url, outpath, '.card'])
17+
return func
18+
19+
20+
def flow(*_, path='data/datasets_in_es'):
21+
return DF.Flow(
22+
DF.load('{}/datapackage.json'.format(path)),
23+
do_screenshot(),
24+
DF.update_resource(-1, **{'dpp:streaming': True})
25+
)
26+
27+
28+
if __name__ == '__main__':
29+
flow(path='https://api.yodaat.org/data/datasets_in_es').process()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node_modules/
29.6 KB
Loading

datapackage_pipelines_migdar/flows/node/package-lock.json

Lines changed: 315 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
const puppeteer = require('puppeteer');
2+
3+
(async () => {
4+
const [url, filename, selector] = process.argv.slice(2);
5+
6+
const browser = await puppeteer.launch({
7+
headless: true,
8+
executablePath: process.env.CHROME_BIN || null,
9+
args: ['--no-sandbox', '--headless', '--disable-gpu', '--disable-dev-shm-usage']
10+
});
11+
const page = await browser.newPage();
12+
page.setViewport({width: 1300, height: 1200});
13+
await page.goto(url);
14+
await page.waitForSelector(selector + ' svg');
15+
16+
const rect = await page.evaluate(selector => {
17+
const element = document.querySelector(selector);
18+
const {x, y, width, height} = element.getBoundingClientRect();
19+
return {x, y, width, height};
20+
}, selector);
21+
await page.screenshot({path: filename, clip: rect});
22+
await browser.close();
23+
})();

0 commit comments

Comments
 (0)