Skip to content

Commit 6a6843e

Browse files
committed
feat(tranco): add rank category
1 parent e13eed2 commit 6a6843e

File tree

1 file changed

+22
-2
lines changed

1 file changed

+22
-2
lines changed

src/functions/get_tranco.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,34 @@ namespace duckdb
110110

111111
if (!file.good ())
112112
{
113-
LogMessage ("ERROR", "Tranco list not found. Download it first using `SELECT update_tranco(true);`");
113+
LogMessage ("ERROR", "Tranco list `" + temp_file + "` not found. Download it first using `SELECT update_tranco(true);`");
114114
}
115115

116116
// Parse the CSV data and insert into a table
117117
LogMessage ("INFO", "Inserting Tranco list into table");
118118

119119
Connection con (db);
120-
con.Query ("CREATE OR REPLACE TABLE tranco_list AS SELECT * FROM read_csv('" + temp_file + "', header=false, columns={'rank': 'INTEGER', 'domain': 'VARCHAR'})");
120+
string query = "CREATE OR REPLACE TABLE tranco_list AS"
121+
" SELECT rank,"
122+
" domain,"
123+
" CASE"
124+
" WHEN rank <= 1000 THEN 'top1k'"
125+
" WHEN rank <= 5000 THEN 'top5k'"
126+
" WHEN rank <= 10000 THEN 'top10k'"
127+
" WHEN rank <= 100000 THEN 'top100k'"
128+
" WHEN rank <= 500000 THEN 'top500k'"
129+
" WHEN rank <= 1000000 THEN 'top1m'"
130+
" WHEN rank <= 5000000 THEN 'top5m'"
131+
" ELSE 'other'"
132+
" END AS category"
133+
" FROM read_csv('" +
134+
temp_file + "', header = false, columns = { 'rank': 'INTEGER', 'domain': 'VARCHAR' })";
135+
auto result = con.Query (query);
136+
137+
if (result->HasError ())
138+
{
139+
LogMessage ("ERROR", result->GetError ());
140+
}
121141
}
122142

123143
// Function to update the Tranco list table

0 commit comments

Comments
 (0)