@@ -73,7 +73,7 @@ WHERE date = '${constants.currentMonth}' AND
73
73
${ constants . devRankFilter }
74
74
` ) . postOps ( ctx => `
75
75
CREATE TEMP TABLE technologies_cleaned AS (
76
- WITH wappalyzer AS (
76
+ WITH technologies AS (
77
77
SELECT DISTINCT
78
78
name AS technology,
79
79
categories
@@ -101,13 +101,13 @@ CREATE TEMP TABLE technologies_cleaned AS (
101
101
LEFT JOIN pages.categories AS category
102
102
WHERE
103
103
-- Technology is corrupted
104
- technology NOT IN (SELECT DISTINCT technology FROM wappalyzer ) OR
104
+ technology NOT IN (SELECT DISTINCT technology FROM technologies ) OR
105
105
-- Technology's category is corrupted
106
106
CONCAT(technology, category) NOT IN (
107
107
SELECT DISTINCT
108
108
CONCAT(technology, category)
109
- FROM wappalyzer
110
- LEFT JOIN wappalyzer .categories AS category
109
+ FROM technologies
110
+ INNER JOIN technologies .categories AS category
111
111
)
112
112
),
113
113
@@ -118,14 +118,14 @@ CREATE TEMP TABLE technologies_cleaned AS (
118
118
page,
119
119
ARRAY_AGG(STRUCT(
120
120
pages.technology,
121
- wappalyzer .categories,
121
+ technologies .categories,
122
122
pages.info
123
123
)) AS technologies
124
124
FROM pages
125
125
INNER JOIN impacted_pages
126
126
USING (client, page)
127
- INNER JOIN wappalyzer
128
- ON pages. technology = wappalyzer.technology
127
+ INNER JOIN technologies
128
+ USING ( technology)
129
129
GROUP BY
130
130
client,
131
131
page
@@ -134,8 +134,10 @@ CREATE TEMP TABLE technologies_cleaned AS (
134
134
SELECT
135
135
client,
136
136
page,
137
- technologies
138
- FROM reconstructed_technologies
137
+ reconstructed_technologies.technologies
138
+ FROM impacted_pages
139
+ LEFT JOIN reconstructed_technologies
140
+ USING(client,page)
139
141
);
140
142
141
143
-- Update the crawl.pages table with the cleaned and restored technologies
0 commit comments