CorieW
diff --git a/‎.gitignore
Lines changed: 16 additions & 1 deletion b/‎.gitignore
Lines changed: 16 additions & 1 deletion
diff --git a/‎CHANGELOG.md
Lines changed: 11 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 11 additions & 0 deletions
diff --git a/‎POSTINSTALL.md
Lines changed: 136 additions & 48 deletions b/‎POSTINSTALL.md
Lines changed: 136 additions & 48 deletions
diff --git a/‎_emulator/extensions/firestore-send-email.env.local
Lines changed: 0 additions & 1 deletion b/‎_emulator/extensions/firestore-send-email.env.local
Lines changed: 0 additions & 1 deletion
diff --git a/‎_emulator/extensions/firestore-web-scraper.env.local
Lines changed: 3 additions & 0 deletions b/‎_emulator/extensions/firestore-web-scraper.env.local
Lines changed: 3 additions & 0 deletions
diff --git a/‎_emulator/firebase.json
Lines changed: 0 additions & 23 deletions b/‎_emulator/firebase.json
Lines changed: 0 additions & 23 deletions
diff --git a/‎_emulator/firestore.rules
Lines changed: 0 additions & 9 deletions b/‎_emulator/firestore.rules
Lines changed: 0 additions & 9 deletions
diff --git a/‎extension.yaml
Lines changed: 27 additions & 5 deletions b/‎extension.yaml
Lines changed: 27 additions & 5 deletions
@@ -1,2 +1,17 @@
 .gcloudignore
-!emulator-params.env
+!emulator-params.env
+
+# Any files that are private
+project.private/
+
+# Environment variables
+*.env
+*.env.development.local
+*.env.test.local
+*.env.production.local
+*.env.development
+*.env.test
+*.env.production
+
+# Required for emulator
+_emulator/extensions/firestore-web-scraper.env.local
@@ -1,3 +1,14 @@
+## Version 0.2.0
+
+- feat: upgraded to v2 functions
+- feat: allow non-(default) firestore databases
+- feat: added location parameter
+- feat: improved validation
+- fix: removed deprecated `xmldom` dependency, replaced with `@xmldom/xmldom`
+- chore: audit dependencies
+- chore: removed unnecessary packages
+- chore: updated **POSTINSTALL.md**
+
 ## Version 0.1.0
 
 Initial release of the extension.
@@ -2,58 +2,145 @@
 
 After installing the extension, follow this guide to configure scraping tasks and manage extracted data. Below you'll find detailed instructions, document structures, and examples.
 
+## Setting Up a Task
+Create a document in your tasks collection **`${param:SCRAPE_COLLECTION}`** to define a scraping task.
+
+---
+
+### Task Document Structure
+
+#### Required Fields:
+- **url** (string): Target URL to scrape (e.g., `"https://example.com"`)
+- **queries** (array of objects): List of queries to extract data from the HTML content
+
 ---
 
-## **Setting Up a Task**
-Create a document in your tasks collection **`${param:SCRAPE_COLLECTION}`** to define a scraping task. 
-
-### **Task Document Structure**
-| Field       | Type             | Description                                                                 |
-|-------------|------------------|-----------------------------------------------------------------------------|
-| `url`       | string           | **Required.** Target URL to scrape (e.g., `"https://example.com"`).        |
-| `queries`   | array of objects | **Required.** List of queries to extract data from the HTML content.       |
-
-### **1. `queries` Configuration**
-Each query in the `queries` array narrows down elements from the HTML. Queries execute **in sequence**, with each subsequent query applied to the results of the previous one.
-
-#### **1.1. Query Object**
-| Field    | Type   | Description                                                                                   |
-|----------|--------|-----------------------------------------------------------------------------------------------|
-| `id`     | string | **Required.** Unique identifier for the query.                                                              |
-| `type`   | string | **Required.** Selector type. Supported values: `id`, `class`, `tag`, `attribute`, `text`, `xpath`.          |
-| `value`  | string | **Required.** Value for the selector (see examples below).                                                  |
-| `target` | string (optional) | What to extract from the selected elements. Supported values: `html`, `text`, `attribute`. `html` is set by default |
-| `attr`   | string (optional) | Attribute name to extract when `target` is set to `attribute`.                     |
-
-#### **1.2. Examples by Query Type**
-| Type         | `value` Example               | Description                                      |
-|--------------|-------------------------------|--------------------------------------------------|
-| **`id`**     | `"header"`                    | Select element with ID `#header`.                |
-| **`class`**  | `"menu-item"`                 | Select elements with class `.menu-item`.         |
-| **`tag`**    | `"a"`                         | Select all `<a>` tags.                           |
-| **`attribute`** | `"href"` or `"[data-role='button']"` | Select elements with the `href` attribute or matching `data-role="button"`. |
-| **`xpath`**  | `"//div[@class='content']"`   | Select elements using an XPath expression.       |
-| **`selector`** | `"#header > h1"`              | Select elements using a CSS selector.            |
-
-#### **1.3. Examples By Target Type**
-| Target          | Description                                                                                   |
-|-----------------|-----------------------------------------------------------------------------------------------|
-| **`html`**      | Extracts the HTML content of the selected elements.                                           |
-| **`inner`**     | Extracts the inner HTML content of the selected elements.                                      |
-| **`text`**      | Extracts the text content of the selected elements.                                           |
-| **`attribute`** | Extracts the value of the specified attribute from the selected elements.                     |
-
-
-### **Example Task Document (Before Processing)**
+### Query Configuration
+Each query in the `queries` array narrows down specific elements from the HTML. Multiple queries can be used to extract different types of data from the same HTML.
+
+#### Query Object Fields:
+- **id** (string, required): Unique identifier for the query. Will be used as the key in the output `data` object.
+- **type** (string, required): Selector type. Supported values:
+  - `id`: Select by element ID
+  - `class`: Select by CSS class
+  - `tag`: Select by HTML tag
+  - `attribute`: Select by attribute
+  - `text`: Select by text content
+  - `selector`: Select using CSS selector
+- **value** (string, required): Value for the selector
+- **target** (string, optional): What to extract from selected elements
+  - `html`: Extract HTML content (default)
+  - `text`: Extract text content
+  - `attribute`: Extract attribute value
+- **attr** (string, optional): Attribute name to extract when `target` is set to `attribute`. Only allowed when `type` is `attribute`.
+
+---
+
+### Query Type Examples
+
+#### ID Selector
+```json
+{
+  "id": "header",
+  "type": "id",
+  "value": "header"
+}
+```
+Selects element with ID `#header`
+
+#### Class Selector
+```json
+{
+  "id": "menu",
+  "type": "class",
+  "value": "menu-item"
+}
+```
+Selects elements with class `.menu-item`
+
+#### Tag Selector
+```json
+{
+  "id": "links",
+  "type": "tag",
+  "value": "a"
+}
+```
+Selects all `<a>` tags
+
+#### Attribute Selector
+```json
+{
+  "id": "buttons",
+  "type": "attribute",
+  "value": "data-role",
+  "target": "attribute",
+  "attr": "data-role"
+}
+```
+Selects elements with matching attribute
+
+#### CSS Selector
+```json
+{
+  "id": "content",
+  "type": "selector",
+  "value": "div.content"
+}
+```
+Selects elements using CSS selector
+
+---
+
+### Target Type Examples
+
+#### HTML Target
+```json
+{
+  "id": "content",
+  "type": "class",
+  "value": "content",
+  "target": "html"
+}
+```
+Extracts the HTML content of selected elements
+
+#### Text Target
+```json
+{
+  "id": "title",
+  "type": "tag",
+  "value": "h1",
+  "target": "text"
+}
+```
+Extracts the text content of selected elements
+
+#### Attribute Target
+```json
+{
+  "id": "links",
+  "type": "tag",
+  "value": "a",
+  "target": "attribute",
+  "attr": "href"
+}
+```
+Extracts the value of specified attribute from selected elements
+
+---
+
+### Complete Example
+
+#### Task Document (Before Processing)
 ```json
 {
   "url": "https://example.com",
   "queries": [
     {
       "id": "title",
-      "type": "xpath",
-      "value": "//title",
-      "target": "text"
+      "type": "tag",
+      "value": "h1"
     },
     {
       "id": "description",
@@ -71,16 +158,17 @@ Each query in the `queries` array narrows down elements from the HTML. Queries e
 }
 ```
 
-### **Example Data Document (After Processing)**
+Extracts the text content of the `<h1>` tag, the text content of the element with class `description`, and the value of the `href` attribute from all `<a>` tags.
+
+#### Result Document (After Processing)
 ```json
 {
   "url": "https://example.com",
   "queries": [
     {
       "id": "title",
-      "type": "xpath",
-      "value": "//title",
-      "target": "text"
+      "type": "tag",
+      "value": "h1"
     },
     {
       "id": "description",
 
@@ -0,0 +1,3 @@
+LOCATION=us
+DATABASE=(default)
+SCRAPE_COLLECTION=tasks
@@ -2,39 +2,16 @@
   "extensions": {
     "firestore-web-scraper": "../"
   },
-  "storage": {
-    "rules": "storage.rules"
-  },
   "emulators": {
-    "hub": {
-      "port": 4000
-    },
     "storage": {
       "port": 9199
     },
-    "auth": {
-      "port": 9099
-    },
-    "pubsub": {
-      "port": 8085
-    },
-    "functions": {
-      "port": 5001
-    },
     "ui": {
       "enabled": true
     },
     "firestore": {
       "host": "127.0.0.1",
       "port": 8080
     }
-  },
-  "functions": {
-    "port": 5002,
-    "source": "functions"
-  },
-  "firestore": {
-    "rules": "firestore.rules",
-    "indexes": "firestore.indexes.json"
   }
 }
@@ -2,15 +2,6 @@ rules_version = '2';
 service cloud.firestore {
   match /databases/{database}/documents {
     match /{document=**} {
-      // This rule allows anyone with your database reference to view, edit,
-      // and delete all data in your database. It is useful for getting
-      // started, but it is configured to expire after 30 days because it
-      // leaves your app open to attackers. At that time, all client
-      // requests to your database will be denied.
-      //
-      // Make sure to write security rules for your app before that time, or
-      // else all client requests to your database will be denied until you
-      // update your rules.
       allow read, write;
     }
   }
 
@@ -1,5 +1,5 @@
 name: firestore-web-scraper
-version: 0.1.0
+version: 0.2.0
 specVersion: v1beta
 
 displayName: Web Scrape with Firestore
@@ -23,17 +23,39 @@ roles:
 
 resources:
   - name: processQueue
-    type: firebaseextensions.v1beta.function
+    type: firebaseextensions.v1beta.v2function
     description:
       Processes document changes in the specified Cloud Firestore collection,
       creating and performing web scraping tasks.
     properties:
-      runtime: nodejs20
+      sourceDirectory: functions
+      buildConfig:
+        runtime: nodejs22
       eventTrigger:
-        eventType: providers/cloud.firestore/eventTypes/document.create
-        resource: projects/${param:PROJECT_ID}/databases/(default)/documents/${param:SCRAPE_COLLECTION}/{id}
+        eventType: google.cloud.firestore.document.v1.created
+        triggerRegion: ${param:LOCATION}
+        eventFilters:
+          - attribute: database
+            value: ${param:DATABASE}
+          - attribute: document
+            value: ${param:SCRAPE_COLLECTION}/{documentId}
+            operator: match-path-pattern
 
 params:
+  - param: LOCATION
+    label: Location
+    description: The location of the Cloud Firestore database.
+    type: string
+    default: us-central1
+    required: true
+
+  - param: DATABASE
+    label: Database
+    description: The Firestore database to use. "(default)" is used for the default database.
+    type: string
+    default: (default)
+    required: true
+
   - param: SCRAPE_COLLECTION
     label: Scrape documents collection
     description: >-
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+LOCATION=us`
	`2`	`+DATABASE=(default)`
	`3`	`+SCRAPE_COLLECTION=tasks`