Skip to content

Commit 6d0a616

Browse files
committed
adding dscription on technologies collection
1 parent 0e0de6e commit 6d0a616

File tree

4 files changed

+3460
-13
lines changed

4 files changed

+3460
-13
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import os
2+
import json
3+
4+
##################################################################################################
5+
#
6+
# This script read all techonolgoies descriptions from wappalyzer/src/technologies directory
7+
# and creates a JSON file with the keys and descriptions
8+
#
9+
##################################################################################################
10+
11+
# Define the directory path
12+
directory_path = "../../wappalyzer/src/technologies"
13+
14+
# Initialize a dictionary to store the keys and descriptions
15+
key_description_dict = {}
16+
17+
# Loop through all the files in the directory
18+
for filename in os.listdir(directory_path):
19+
if filename.endswith(".json"):
20+
file_path = os.path.join(directory_path, filename)
21+
22+
# Open and parse the JSON file
23+
with open(file_path, 'r', encoding='utf-8') as json_file:
24+
data = json.load(json_file)
25+
26+
# Loop through the keys in the JSON data
27+
for key, value in data.items():
28+
if 'description' in value:
29+
key_description_dict[key] = value['description']
30+
31+
# Define the path for the output JSON file
32+
output_file_path = "key_description.json"
33+
34+
# Save the keys and descriptions in the output JSON file
35+
with open(output_file_path, 'w', encoding='utf-8') as output_file:
36+
json.dump(key_description_dict, output_file, indent=4)
37+
38+
print(f"Keys and descriptions saved to {output_file_path}")
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import os
2+
import json
3+
from google.cloud import firestore
4+
5+
# Initialize Firestore client
6+
# Replace 'your-project-id' with your actual Firestore project ID
7+
# You must also set up authentication for your project.
8+
db = firestore.Client()
9+
10+
# Define the path to the JSON file
11+
json_file_path = "key_description.json"
12+
13+
# Read the JSON data from the file
14+
with open(json_file_path, 'r', encoding='utf-8') as json_file:
15+
data = json.load(json_file)
16+
17+
# Define the Firestore collection reference
18+
collection_ref = db.collection(u'technologies-list')
19+
20+
# Loop through the data and save it to Firestore
21+
for key, description in data.items():
22+
document_data = {
23+
'name': key,
24+
'description': description
25+
}
26+
27+
collection_ref.document().set(document_data)
28+
29+
print("Data has been saved to Firestore.")

scripts/script_technologies.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
################################################################################
2+
# Description: This script queries the BigQuery table "technologies"
3+
# and inserts the result into
4+
# the Firestore collection "technologies".
5+
#
6+
# Parameters: start_date (optional), end_date (optional)
7+
# Usage example: python scripts/script_technologies.py 2020-01-01 2020-12-31
8+
################################################################################
9+
110
import sys
211
from google.cloud import bigquery
312
from google.cloud import firestore
@@ -16,16 +25,6 @@ def execute_query_and_insert_result(start_date, end_date):
1625
# Set up Firestore client
1726
firestore_client = firestore.Client()
1827

19-
# Define the BigQuery query with optional parameters
20-
# query = """
21-
# SELECT
22-
# *
23-
# FROM
24-
# `httparchive.core_web_vitals.technologies`
25-
# WHERE
26-
# 1=1
27-
# """
28-
2928
query = """
3029
SELECT
3130
date,
@@ -44,7 +43,6 @@ def execute_query_and_insert_result(start_date, end_date):
4443
geo = 'ALL' AND
4544
rank = 'ALL'
4645
"""
47-
4846
# Construct the WHERE clause based on the provided parameters
4947
if start_date and end_date:
5048
query += f" AND date >= '{start_date}' AND date <= '{end_date}'"
@@ -56,12 +54,26 @@ def execute_query_and_insert_result(start_date, end_date):
5654
results = query_job.result()
5755

5856
# Create a new Firestore document for each result and insert it into the "technologies" collection
59-
collection_ref = firestore_client.collection('technologies')
60-
print(results)
57+
collection_ref = firestore_client.collection(u'technologies')
58+
59+
tech_collection_ref = firestore_client.collection(u'technologies-list')
60+
6161
for row in results:
6262

6363
item = dict(row.items())
64+
65+
# Query the techonologies-list collection for the description
66+
#
67+
tech_query = tech_collection_ref.where('name', '==', row['technology'])
68+
tech_query = tech_query.limit(1)
69+
tech_results = tech_query.stream()
70+
technology = {}
71+
for tech in tech_results:
72+
technology = tech.to_dict()
73+
74+
# overriding BQ fields
6475
item['date'] = str(row['date'])
76+
item['description'] = technology.get('description','')
6577

6678
print(item)
6779

0 commit comments

Comments
 (0)