Skip to content

Commit ef75de7

Browse files
committed
chore: move Tranco functions to it's file
1 parent fa5c8ab commit ef75de7

File tree

2 files changed

+111
-110
lines changed

2 files changed

+111
-110
lines changed

src/functions/get_tranco.cpp

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,123 @@
11
#include "get_tranco.hpp"
22
#include "../utils/utils.hpp"
33

4+
#include <curl/curl.h>
5+
#include <fstream>
46
#include <regex>
57

68
namespace duckdb
79
{
810
namespace netquack
911
{
12+
// Function to get the download code for the Tranco list
13+
std::string GetTrancoDownloadCode(char *date)
14+
{
15+
CURL *curl;
16+
CURLcode res;
17+
std::string readBuffer;
18+
19+
// Construct the URL for the daily list
20+
std::string url = "https://tranco-list.eu/daily_list?date=" + std::string(date) + "&subdomains=true";
21+
22+
LogMessage("INFO", "Get Tranco download code for date: " + std::string(date));
23+
24+
curl = curl_easy_init();
25+
if (curl)
26+
{
27+
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
28+
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); // Follow redirects
29+
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
30+
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
31+
res = curl_easy_perform(curl);
32+
curl_easy_cleanup(curl);
33+
34+
if (res != CURLE_OK)
35+
{
36+
throw std::runtime_error("Failed to fetch Tranco download code.");
37+
}
38+
}
39+
40+
// Extract the download code from the URL
41+
std::regex code_regex(R"(Information on the Tranco list with ID ([A-Z0-9]+))");
42+
std::smatch code_match;
43+
if (std::regex_search(readBuffer, code_match, code_regex) && code_match.size() > 1)
44+
{
45+
LogMessage("INFO", "Tranco download code: " + code_match[1].str());
46+
return code_match[1].str();
47+
}
48+
49+
throw std::runtime_error("Failed to extract Tranco download code.");
50+
}
51+
52+
// Function to download the Tranco list and create a table
53+
void LoadTrancoList(DatabaseInstance &db, bool force)
54+
{
55+
// Get yesterday's date in YYYY-MM-DD format
56+
std::time_t now = std::time(nullptr);
57+
std::tm *yesterday = std::localtime(&now);
58+
yesterday->tm_mday -= 1; // Subtract one day
59+
std::mktime(yesterday); // Normalize the time
60+
char date[11];
61+
std::strftime(date, sizeof(date), "%Y-%m-%d", yesterday);
62+
63+
// Construct the file name
64+
std::string temp_file = "tranco_list_" + std::string(date) + ".csv";
65+
66+
// Download the file if it doesn't exist or if force is true
67+
std::ifstream file(temp_file);
68+
if (force)
69+
{
70+
// Remove the old file if it exists
71+
if (file.good())
72+
{
73+
remove(temp_file.c_str());
74+
}
75+
// Get the download code
76+
std::string download_code = GetTrancoDownloadCode(date);
77+
78+
// Construct the download URL
79+
std::string download_url = "https://tranco-list.eu/download/" + download_code + "/full";
80+
81+
LogMessage("INFO", "Download Tranco list: " + download_url);
82+
83+
// Download the CSV file to a temporary file
84+
CURL *curl;
85+
CURLcode res;
86+
FILE *file = fopen(temp_file.c_str(), "wb");
87+
if (!file)
88+
{
89+
throw std::runtime_error("Failed to create temporary file for Tranco list.");
90+
}
91+
92+
curl = curl_easy_init();
93+
if (curl)
94+
{
95+
curl_easy_setopt(curl, CURLOPT_URL, download_url.c_str());
96+
curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
97+
res = curl_easy_perform(curl);
98+
curl_easy_cleanup(curl);
99+
fclose(file);
100+
101+
if (res != CURLE_OK)
102+
{
103+
remove(temp_file.c_str()); // Clean up the temporary file
104+
throw std::runtime_error("Failed to download Tranco list.");
105+
}
106+
}
107+
}
108+
109+
if (!file.good())
110+
{
111+
LogMessage("ERROR", "Tranco list not found. Download it first using `SELECT update_tranco(true);`");
112+
}
113+
114+
// Parse the CSV data and insert into a table
115+
LogMessage("INFO", "Inserting Tranco list into table");
116+
117+
Connection con(db);
118+
con.Query("CREATE OR REPLACE TABLE tranco_list AS SELECT * FROM read_csv('" + temp_file + "', header=false, columns={'rank': 'INTEGER', 'domain': 'VARCHAR'})");
119+
}
120+
10121
// Function to update the Tranco list table
11122
void UpdateTrancoListFunction(DataChunk &args, ExpressionState &state, Vector &result)
12123
{

src/utils/utils.cpp

Lines changed: 0 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include <iostream>
77
#include <fstream>
88
#include <iomanip>
9-
#include <fstream>
109

1110
namespace duckdb
1211
{
@@ -100,114 +99,5 @@ namespace duckdb
10099

101100
result.SetValue(0, Value("updated"));
102101
}
103-
104-
// Function to get the download code for the Tranco list
105-
std::string GetTrancoDownloadCode(char *date)
106-
{
107-
CURL *curl;
108-
CURLcode res;
109-
std::string readBuffer;
110-
111-
// Construct the URL for the daily list
112-
std::string url = "https://tranco-list.eu/daily_list?date=" + std::string(date) + "&subdomains=true";
113-
114-
LogMessage("INFO", "Get Tranco download code for date: " + std::string(date));
115-
116-
curl = curl_easy_init();
117-
if (curl)
118-
{
119-
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
120-
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); // Follow redirects
121-
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
122-
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
123-
res = curl_easy_perform(curl);
124-
curl_easy_cleanup(curl);
125-
126-
if (res != CURLE_OK)
127-
{
128-
throw std::runtime_error("Failed to fetch Tranco download code.");
129-
}
130-
}
131-
132-
// Extract the download code from the URL
133-
std::regex code_regex(R"(Information on the Tranco list with ID ([A-Z0-9]+))");
134-
std::smatch code_match;
135-
if (std::regex_search(readBuffer, code_match, code_regex) && code_match.size() > 1)
136-
{
137-
LogMessage("INFO", "Tranco download code: " + code_match[1].str());
138-
return code_match[1].str();
139-
}
140-
141-
throw std::runtime_error("Failed to extract Tranco download code.");
142-
}
143-
144-
// Function to download the Tranco list and create a table
145-
void LoadTrancoList(DatabaseInstance &db, bool force)
146-
{
147-
// Get yesterday's date in YYYY-MM-DD format
148-
std::time_t now = std::time(nullptr);
149-
std::tm *yesterday = std::localtime(&now);
150-
yesterday->tm_mday -= 1; // Subtract one day
151-
std::mktime(yesterday); // Normalize the time
152-
char date[11];
153-
std::strftime(date, sizeof(date), "%Y-%m-%d", yesterday);
154-
155-
// Construct the file name
156-
std::string temp_file = "tranco_list_" + std::string(date) + ".csv";
157-
158-
// Download the file if it doesn't exist or if force is true
159-
std::ifstream file(temp_file);
160-
if (force)
161-
{
162-
// Remove the old file if it exists
163-
if (file.good())
164-
{
165-
remove(temp_file.c_str());
166-
}
167-
// Get the download code
168-
std::string download_code = GetTrancoDownloadCode(date);
169-
170-
// Construct the download URL
171-
std::string download_url = "https://tranco-list.eu/download/" + download_code + "/full";
172-
173-
LogMessage("INFO", "Download Tranco list: " + download_url);
174-
175-
// Download the CSV file to a temporary file
176-
CURL *curl;
177-
CURLcode res;
178-
FILE *file = fopen(temp_file.c_str(), "wb");
179-
if (!file)
180-
{
181-
throw std::runtime_error("Failed to create temporary file for Tranco list.");
182-
}
183-
184-
curl = curl_easy_init();
185-
if (curl)
186-
{
187-
curl_easy_setopt(curl, CURLOPT_URL, download_url.c_str());
188-
curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
189-
res = curl_easy_perform(curl);
190-
curl_easy_cleanup(curl);
191-
fclose(file);
192-
193-
if (res != CURLE_OK)
194-
{
195-
remove(temp_file.c_str()); // Clean up the temporary file
196-
throw std::runtime_error("Failed to download Tranco list.");
197-
}
198-
}
199-
}
200-
201-
if (!file.good())
202-
{
203-
LogMessage("ERROR", "Tranco list not found. Download it first using `SELECT update_tranco(true);`");
204-
}
205-
206-
// Parse the CSV data and insert into a table
207-
LogMessage("INFO", "Inserting Tranco list into table");
208-
209-
Connection con(db);
210-
con.Query("CREATE OR REPLACE TABLE tranco_list AS SELECT * FROM read_csv('" + temp_file + "', header=false, columns={'rank': 'INTEGER', 'domain': 'VARCHAR'})");
211-
}
212102
}
213103
}

0 commit comments

Comments
 (0)