Merge pull request #7 from lightpanda-io/playwright

krichprollsch · web-flow · commit 64fb14b21811 · 2024-04-09T10:28:53.000+02:00
playwright: add playwright bench for chrome
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 /ws/ws
+/node_modules
diff --git a/README.md b/README.md
@@ -53,18 +53,18 @@ By default it exposes the `public` dir using the `1234` port.
 $ go run ws/main.go
 ```
 
-## Single request
-
-This bench is a very basic test to compare the two software.
-We start the browser and request the fake web page once with full JS execution. The final DOMTree is
-rendered in stdout.
-
 ### Test machine
 
 The tests are run in an AWS m5.large (x86_64) with a fresh Debian install.
 
 ![aws.m5 neofetch](./img/aws_m5_neofetch.png)
 
+## Single request
+
+This bench is a very basic test to compare the two software.
+We start the browser and request the fake web page once with full JS execution. The final DOMTree is
+rendered in stdout.
+
 We use Google Chrome version 122.0.6261.94.
 
 ```console
@@ -147,7 +147,46 @@ $ /usr/bin/time -v ./browsercore-get --dump http://127.0.0.1:1234/campfire-comme
         Exit status: 0
 ```
 
-## Multiple requests
+## Multiple requests using Playwright
+
+We compare now multiple page loads and js evaluations using
+[Playwright](https://playwright.dev).
+
+### Dependencies
+
+To run the benchmark, you need ti install [nodejs](https://nodejs.org/en/download).
+
+Once `nodejs` is installed, please run a `npm install` to install nodejs
+dependencies, mainly Playwright.
+
+You have also to install [Google Chrome](https://www.google.com/chrome/) and
+Lightpanda browser, but the code is not publicly available yet.
+
+### Google Chrome benchmark
+
+We use Google Chrome version 123.0.6312.105.
+
+The `playwright/chrome.js` benchmark accepts multiple env vars to be configured.
+* `CHROME_PATH` is the path to your Google Chrome bin,
+* `BASE_URL` is the base url of the running web reser to request, by default `http://127.0.0.1:1234`,
+* `RUNS` is the number of pages loaded by the benchmark, default is `100`.
+
+`npm run bench-chrome` starts a playwright process, load a Google Chrome
+instance and load the page to extract data 100 times.
+
+```console
+$ CHROME_PATH=`which google-chrome` npm run bench-chrome
+
+> demo@1.0.0 bench-chrome
+> node playwright/chrome.js
+
+................................................................................
+....................
+total runs 100
+total duration (ms) 18792
+avg run duration (ms) 184
+min run duration (ms) 168
+max run duration (ms) 323
+```
 
-We plan to create a benchmark to compare the memory used during multiple
-successive requests sent to a CDP server.
+![aws.m5 Playwright with Google Chrome](./img/aws_m5_playwright_chrome.png)
diff --git a/img/aws_m5_playwright_chrome.png b/img/aws_m5_playwright_chrome.png
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -0,0 +1,26 @@
+{
+  "type": "module",
+  "name": "demo",
+  "version": "1.0.0",
+  "description": "Lightpanda browser demo",
+  "main": "index.js",
+  "scripts": {
+    "install-chrome": "npx playwright install chrome",
+    "ws": "go run ws/main.go",
+    "bench-chrome": "node playwright/chrome.js"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/lightpanda-io/demo.git"
+  },
+  "keywords": [],
+  "author": "Lightpanda",
+  "license": "Apache 2",
+  "bugs": {
+    "url": "https://github.com/lightpanda-io/demo/issues"
+  },
+  "homepage": "https://lightpanda.io",
+  "dependencies": {
+    "playwright": "^1.42.1"
+  }
+}
diff --git a/playwright/chrome.js b/playwright/chrome.js
@@ -0,0 +1,113 @@
+// Import the Chromium browser into our scraper.
+import { chromium } from 'playwright';
+
+// options passed to the browser.
+let browser_options = {};
+
+// chrome browser path
+if (process.env.CHROME_PATH) {
+    browser_options.executablePath = process.env.CHROME_PATH;
+}
+
+// headless
+if (process.env.HEADLESS) {
+    browser_options.headless = process.env.HEADLESS === 'true';
+}
+
+// web serveur url
+const baseURL = process.env.BASE_URL ? process.env.BASE_URL : 'http://127.0.0.1:1234';
+
+// runs
+const runs = process.env.RUNS ? parseInt(process.env.RUNS) : 100;
+
+// measure general time.
+const gstart = process.hrtime.bigint();
+// store all run durations
+let metrics = [];
+
+// Open a Chromium browser. We use headless: false
+// to be able to watch the browser window.
+const browser = await chromium.launch(browser_options);
+
+for (var run = 1; run<=runs; run++) {
+
+    // measure run time.
+    const rstart = process.hrtime.bigint();
+
+    const context = await browser.newContext({
+        baseURL: baseURL,
+    });
+
+    const page = await context.newPage();
+    await page.goto('/campfire-commerce');
+
+    // ensure the price is loaded.
+    await page.waitForFunction(() => {
+        const price = document.querySelector('#product-price');
+        return price.textContent.length > 0;
+    });
+
+
+    // ensure the reviews are loaded.
+    await page.waitForFunction(() => {
+        const reviews = document.querySelectorAll('#product-reviews > div');
+        return reviews.length > 0;
+    });
+
+    let res = {};
+
+    res.name = await page.locator('#product-name').textContent();
+    res.price = parseFloat((await page.locator('#product-price').textContent()).substring(1));
+    res.description = await page.locator('#product-description').textContent();
+    res.features = await page.locator('#product-features > li').allTextContents();
+    res.image = await page.locator('#product-image').getAttribute('src');
+
+    let related = [];
+    var i = 0;
+    for (const row of await page.locator('#product-related > div').all()) {
+        related[i++] = {
+            name: await row.locator('h4').textContent(),
+            price: parseFloat((await row.locator('p').textContent()).substring(1)),
+            image: await row.locator('img').getAttribute('src'),
+        };
+    }
+    res.related = related;
+
+    let reviews = [];
+    var i =0;
+    for (const row of await page.locator('#product-reviews > div').all()) {
+        reviews[i++] = {
+            title: await row.locator('h4').textContent(),
+            text: await row.locator('p').textContent(),
+        };
+    }
+    res.reviews = reviews;
+
+    // console.log(res);
+
+    process.stderr.write('.');
+    if(run % 80 == 0) process.stderr.write('\n');
+
+    await page.close();
+    await context.close();
+
+    metrics[run] = process.hrtime.bigint() - rstart;
+}
+
+// Turn off the browser to clean up after ourselves.
+await browser.close();
+
+const gduration = process.hrtime.bigint() - gstart;
+
+process.stderr.write('\n');
+
+const avg = metrics.reduce((s, a) => s += a) / BigInt(metrics.length);
+const min = metrics.reduce((s, a) => a < s ? a : s);
+const max = metrics.reduce((s, a) => a > s ? a : s);
+
+console.log('total runs', runs);
+console.log('total duration (ms)', (gduration/1000000n).toString());
+console.log('avg run duration (ms)', (avg/1000000n).toString());
+console.log('min run duration (ms)', (min/1000000n).toString());
+console.log('max run duration (ms)', (max/1000000n).toString());
+