Skip to content

Commit e8bc244

Browse files
authored
[Enhancement](pyudf) add helper command to show more py info (#60751)
doc: apache/doris-website#3402 Add the auxiliary commands `SHOW PYTHON VERSIONS` and `SHOW PYTHON PACKAGES IN '<VERSION>'` to display more PythonUDF-related information. ```sql Doris> show python versions; +---------+---------+---------+---------------------------------------------+--------------------------------------------------------+ | Version | EnvName | EnvType | BasePath | ExecutablePath | +---------+---------+---------+---------------------------------------------+--------------------------------------------------------+ | 3.9.18 | py39 | conda | /mnt/disk7/linzhenqi/miniconda3/envs/py39 | /mnt/disk7/linzhenqi/miniconda3/envs/py39/bin/python | | 3.8.10 | py3810 | conda | /mnt/disk7/linzhenqi/miniconda3/envs/py3810 | /mnt/disk7/linzhenqi/miniconda3/envs/py3810/bin/python | | 3.12.11 | py312 | conda | /mnt/disk7/linzhenqi/miniconda3/envs/py312 | /mnt/disk7/linzhenqi/miniconda3/envs/py312/bin/python | +---------+---------+---------+---------------------------------------------+--------------------------------------------------------+ 3 rows in set (0.02 sec) Doris> show python packages in '3.9.18'; +-----------------+-------------+ | Package | Version | +-----------------+-------------+ | pyarrow | 21.0.0 | | Bottleneck | 1.4.2 | | jieba | 0.42.1 | | six | 1.17.0 | | wheel | 0.45.1 | | python-dateutil | 2.9.0.post0 | | tzdata | 2025.3 | | setuptools | 80.9.0 | | numpy | 2.0.1 | | psutil | 7.0.0 | | pandas | 2.3.3 | | mkl_random | 1.2.8 | | pip | 25.3 | | snownlp | 0.12.3 | | pytz | 2025.2 | | mkl_fft | 1.3.11 | | mkl-service | 2.4.0 | | numexpr | 2.10.1 | +-----------------+-------------+ ```
1 parent 888a046 commit e8bc244

File tree

17 files changed

+750
-0
lines changed

17 files changed

+750
-0
lines changed

be/src/service/backend_service.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#include "runtime/routine_load/routine_load_task_executor.h"
7272
#include "runtime/stream_load/stream_load_context.h"
7373
#include "runtime/stream_load/stream_load_recorder.h"
74+
#include "udf/python/python_env.h"
7475
#include "util/arrow/row_batch.h"
7576
#include "util/defer_op.h"
7677
#include "util/runtime_profile.h"
@@ -1311,5 +1312,20 @@ void BaseBackendService::test_storage_connectivity(TTestStorageConnectivityRespo
13111312
response.__set_status(status.to_thrift());
13121313
}
13131314

1315+
void BaseBackendService::get_python_envs(std::vector<TPythonEnvInfo>& result) {
1316+
result = PythonVersionManager::instance().env_infos_to_thrift();
1317+
}
1318+
1319+
void BaseBackendService::get_python_packages(std::vector<TPythonPackageInfo>& result,
1320+
const std::string& python_version) {
1321+
PythonVersion version;
1322+
auto& manager = PythonVersionManager::instance();
1323+
THROW_IF_ERROR(manager.get_version(python_version, &version));
1324+
1325+
std::vector<std::pair<std::string, std::string>> packages;
1326+
THROW_IF_ERROR(list_installed_packages(version, &packages));
1327+
result = manager.package_infos_to_thrift(packages);
1328+
}
1329+
13141330
#include "common/compile_check_end.h"
13151331
} // namespace doris

be/src/service/backend_service.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,11 @@ class BaseBackendService : public BackendServiceIf {
122122
void test_storage_connectivity(TTestStorageConnectivityResponse& response,
123123
const TTestStorageConnectivityRequest& request) override;
124124

125+
void get_python_envs(std::vector<TPythonEnvInfo>& result) override;
126+
127+
void get_python_packages(std::vector<TPythonPackageInfo>& result,
128+
const std::string& python_version) override;
129+
125130
////////////////////////////////////////////////////////////////////////////
126131
// begin cloud backend functions
127132
////////////////////////////////////////////////////////////////////////////

be/src/udf/python/python_env.cpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,32 @@
1818
#include "python_env.h"
1919

2020
#include <fmt/core.h>
21+
#include <rapidjson/document.h>
2122

2223
#include <filesystem>
2324
#include <memory>
2425
#include <regex>
2526
#include <vector>
2627

2728
#include "common/status.h"
29+
#include "gen_cpp/BackendService_types.h"
2830
#include "udf/python/python_server.h"
2931
#include "util/string_util.h"
3032

3133
namespace doris {
3234

3335
namespace fs = std::filesystem;
3436

37+
static std::string _python_env_type_to_string(PythonEnvType env_type) {
38+
switch (env_type) {
39+
case PythonEnvType::CONDA:
40+
return "conda";
41+
case PythonEnvType::VENV:
42+
return "venv";
43+
}
44+
return "unknown";
45+
}
46+
3547
// extract python version by executing `python --version` and extract "3.9.16" from "Python 3.9.16"
3648
// @param python_path: path to python executable, e.g. "/opt/miniconda3/envs/myenv/bin/python"
3749
// @param version: extracted python version, e.g. "3.9.16"
@@ -288,4 +300,88 @@ Status PythonVersionManager::init(PythonEnvType env_type, const fs::path& python
288300
return Status::OK();
289301
}
290302

303+
std::vector<TPythonEnvInfo> PythonVersionManager::env_infos_to_thrift() const {
304+
std::vector<TPythonEnvInfo> infos;
305+
const auto& envs = _env_scanner->get_envs();
306+
infos.reserve(envs.size());
307+
308+
const auto env_type_str = _python_env_type_to_string(_env_scanner->env_type());
309+
for (const auto& env : envs) {
310+
TPythonEnvInfo info;
311+
info.__set_env_name(env.env_name);
312+
info.__set_full_version(env.python_version.full_version);
313+
info.__set_env_type(env_type_str);
314+
info.__set_base_path(env.python_version.base_path);
315+
info.__set_executable_path(env.python_version.executable_path);
316+
infos.emplace_back(std::move(info));
317+
}
318+
319+
return infos;
320+
}
321+
322+
std::vector<TPythonPackageInfo> PythonVersionManager::package_infos_to_thrift(
323+
const std::vector<std::pair<std::string, std::string>>& packages) const {
324+
std::vector<TPythonPackageInfo> infos;
325+
infos.reserve(packages.size());
326+
for (const auto& [name, ver] : packages) {
327+
TPythonPackageInfo info;
328+
info.__set_package_name(name);
329+
info.__set_version(ver);
330+
infos.emplace_back(std::move(info));
331+
}
332+
return infos;
333+
}
334+
335+
Status list_installed_packages(const PythonVersion& version,
336+
std::vector<std::pair<std::string, std::string>>* packages) {
337+
DCHECK(packages != nullptr);
338+
if (!version.is_valid()) {
339+
return Status::InvalidArgument("Invalid python version: {}", version.to_string());
340+
}
341+
342+
// Run pip list --format=json to get installed packages
343+
std::string cmd =
344+
fmt::format("\"{}\" -m pip list --format=json 2>/dev/null", version.executable_path);
345+
FILE* pipe = popen(cmd.c_str(), "r");
346+
if (!pipe) {
347+
return Status::InternalError("Failed to run pip list for python version: {}",
348+
version.full_version);
349+
}
350+
351+
std::string result;
352+
char buf[4096];
353+
while (fgets(buf, sizeof(buf), pipe)) {
354+
result += buf;
355+
}
356+
int ret = pclose(pipe);
357+
if (ret != 0) {
358+
return Status::InternalError(
359+
"pip list failed for python version: {}, exit code: {}, output: {}",
360+
version.full_version, ret, result);
361+
}
362+
363+
// Parse JSON output: [{"name": "pkg", "version": "1.0"}, ...]
364+
// Simple JSON parsing without external library
365+
// Each entry looks like: {"name": "package_name", "version": "1.2.3"}
366+
rapidjson::Document doc;
367+
if (doc.Parse(result.data(), result.size()).HasParseError() || !doc.IsArray()) [[unlikely]] {
368+
return Status::InternalError("Failed to parse pip list json output for python version: {}",
369+
version.full_version);
370+
}
371+
372+
packages->reserve(packages->size() + doc.Size());
373+
for (const auto& item : doc.GetArray()) {
374+
auto name_it = item.FindMember("name");
375+
auto version_it = item.FindMember("version");
376+
if (name_it == item.MemberEnd() || version_it == item.MemberEnd() ||
377+
!name_it->value.IsString() || !version_it->value.IsString()) [[unlikely]] {
378+
return Status::InternalError("Invalid pip list json format for python version: {}",
379+
version.full_version);
380+
}
381+
packages->emplace_back(name_it->value.GetString(), version_it->value.GetString());
382+
}
383+
384+
return Status::OK();
385+
}
386+
291387
} // namespace doris

be/src/udf/python/python_env.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
#pragma once
1919

2020
#include <filesystem>
21+
#include <utility>
2122

2223
#include "common/status.h"
24+
#include "gen_cpp/BackendService_types.h"
2325

2426
namespace doris {
2527

@@ -90,6 +92,8 @@ class PythonEnvScanner {
9092

9193
Status get_version(const std::string& runtime_version, PythonVersion* version) const;
9294

95+
const std::vector<PythonEnvironment>& get_envs() const { return _envs; }
96+
9397
std::string root_path() const { return _env_root_path.string(); }
9498

9599
virtual PythonEnvType env_type() const = 0;
@@ -146,12 +150,26 @@ class PythonVersionManager {
146150
return _env_scanner->get_version(runtime_version, version);
147151
}
148152

153+
const std::vector<PythonEnvironment>& get_envs() const { return _env_scanner->get_envs(); }
154+
155+
PythonEnvType env_type() const { return _env_scanner->env_type(); }
156+
149157
std::string to_string() const { return _env_scanner->to_string(); }
150158

159+
std::vector<TPythonEnvInfo> env_infos_to_thrift() const;
160+
161+
std::vector<TPythonPackageInfo> package_infos_to_thrift(
162+
const std::vector<std::pair<std::string, std::string>>& packages) const;
163+
151164
private:
152165
std::unique_ptr<PythonEnvScanner> _env_scanner;
153166
};
154167

168+
// List installed pip packages for a given Python version.
169+
// Returns pairs of (package_name, version).
170+
Status list_installed_packages(const PythonVersion& version,
171+
std::vector<std::pair<std::string, std::string>>* packages);
172+
155173
} // namespace doris
156174

157175
namespace std {

be/test/udf/python/python_env_test.cpp

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include <gtest/gtest.h>
2121

22+
#include <csignal>
2223
#include <filesystem>
2324
#include <fstream>
2425
#include <string>
@@ -28,17 +29,41 @@ namespace doris {
2829

2930
namespace fs = std::filesystem;
3031

32+
static PythonVersion create_fake_python_version_for_pip_list(const std::string& base_path,
33+
const std::string& script_body,
34+
const std::string& full_version) {
35+
const std::string bin_path = base_path + "/bin";
36+
const std::string exec_path = bin_path + "/python3";
37+
fs::create_directories(bin_path);
38+
39+
{
40+
std::ofstream ofs(exec_path);
41+
ofs << "#!/bin/bash\n";
42+
ofs << script_body;
43+
}
44+
fs::permissions(exec_path, fs::perms::owner_all);
45+
46+
return PythonVersion(full_version, base_path, exec_path);
47+
}
48+
3149
class PythonEnvTest : public ::testing::Test {
3250
protected:
3351
std::string test_dir_;
52+
// Some test frameworks set SIGCHLD to SIG_IGN,
53+
// which causes pclose() to get ECHILD because the kernel auto-reaps children.
54+
// We reset SIGCHLD to SIG_DFL for the duration of each test to mimic production
55+
// behaviour, and restore the original handler afterwards.
56+
sighandler_t old_sigchld_ = SIG_DFL;
3457

3558
void SetUp() override {
3659
test_dir_ = fs::temp_directory_path().string() + "/python_env_test_" +
3760
std::to_string(getpid()) + "_" + std::to_string(rand());
3861
fs::create_directories(test_dir_);
62+
old_sigchld_ = signal(SIGCHLD, SIG_DFL);
3963
}
4064

4165
void TearDown() override {
66+
signal(SIGCHLD, old_sigchld_);
4267
if (!test_dir_.empty() && fs::exists(test_dir_)) {
4368
fs::remove_all(test_dir_);
4469
}
@@ -601,4 +626,81 @@ TEST_F(PythonEnvTest, PythonVersionManagerInitCondaSuccess) {
601626
EXPECT_TRUE(status.ok()) << status.to_string();
602627
}
603628

629+
// ============================================================================
630+
// list_installed_packages tests
631+
// ============================================================================
632+
633+
TEST_F(PythonEnvTest, ListInstalledPackagesInvalidPythonVersion) {
634+
PythonVersion invalid_version;
635+
std::vector<std::pair<std::string, std::string>> packages;
636+
637+
Status status = list_installed_packages(invalid_version, &packages);
638+
EXPECT_FALSE(status.ok());
639+
EXPECT_TRUE(status.to_string().find("Invalid python version") != std::string::npos);
640+
}
641+
642+
TEST_F(PythonEnvTest, ListInstalledPackagesPipListExitNonZero) {
643+
PythonVersion version = create_fake_python_version_for_pip_list(
644+
test_dir_ + "/pip_nonzero",
645+
"echo '[{\"name\":\"numpy\",\"version\":\"1.26.0\"}]'\n"
646+
"exit 1\n",
647+
"3.9.16");
648+
std::vector<std::pair<std::string, std::string>> packages;
649+
650+
Status status = list_installed_packages(version, &packages);
651+
EXPECT_FALSE(status.ok());
652+
EXPECT_TRUE(status.to_string().find("pip list failed") != std::string::npos);
653+
}
654+
655+
TEST_F(PythonEnvTest, ListInstalledPackagesParseError) {
656+
PythonVersion version = create_fake_python_version_for_pip_list(
657+
test_dir_ + "/pip_parse_error", "echo 'not-json'\nexit 0\n", "3.9.16");
658+
std::vector<std::pair<std::string, std::string>> packages;
659+
660+
Status status = list_installed_packages(version, &packages);
661+
EXPECT_FALSE(status.ok());
662+
EXPECT_TRUE(status.to_string().find("Failed to parse pip list json output") !=
663+
std::string::npos);
664+
}
665+
666+
TEST_F(PythonEnvTest, ListInstalledPackagesJsonIsNotArray) {
667+
PythonVersion version = create_fake_python_version_for_pip_list(
668+
test_dir_ + "/pip_not_array", "echo '{\"name\":\"numpy\"}'\nexit 0\n", "3.9.16");
669+
std::vector<std::pair<std::string, std::string>> packages;
670+
671+
Status status = list_installed_packages(version, &packages);
672+
EXPECT_FALSE(status.ok());
673+
EXPECT_TRUE(status.to_string().find("Failed to parse pip list json output") !=
674+
std::string::npos);
675+
}
676+
677+
TEST_F(PythonEnvTest, ListInstalledPackagesInvalidJsonItemFormat) {
678+
PythonVersion version = create_fake_python_version_for_pip_list(
679+
test_dir_ + "/pip_invalid_item", "echo '[{\"name\":\"numpy\"}]'\nexit 0\n", "3.9.16");
680+
std::vector<std::pair<std::string, std::string>> packages;
681+
682+
Status status = list_installed_packages(version, &packages);
683+
EXPECT_FALSE(status.ok());
684+
EXPECT_TRUE(status.to_string().find("Invalid pip list json format") != std::string::npos);
685+
}
686+
687+
TEST_F(PythonEnvTest, ListInstalledPackagesSuccess) {
688+
PythonVersion version = create_fake_python_version_for_pip_list(
689+
test_dir_ + "/pip_success",
690+
"echo "
691+
"'[{\"name\":\"numpy\",\"version\":\"1.26.0\"},{\"name\":\"pandas\",\"version\":\"2.2."
692+
"0\"}]'\n"
693+
"exit 0\n",
694+
"3.9.16");
695+
std::vector<std::pair<std::string, std::string>> packages;
696+
697+
Status status = list_installed_packages(version, &packages);
698+
EXPECT_TRUE(status.ok()) << status.to_string();
699+
ASSERT_EQ(packages.size(), 2);
700+
EXPECT_EQ(packages[0].first, "numpy");
701+
EXPECT_EQ(packages[0].second, "1.26.0");
702+
EXPECT_EQ(packages[1].first, "pandas");
703+
EXPECT_EQ(packages[1].second, "2.2.0");
704+
}
705+
604706
} // namespace doris

fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,7 @@ PARAMETER: 'PARAMETER';
414414
PARSED: 'PARSED';
415415
PARTITION: 'PARTITION';
416416
PARTITIONS: 'PARTITIONS';
417+
PACKAGES: 'PACKAGES';
417418
PASSWORD: 'PASSWORD';
418419
PASSWORD_EXPIRE: 'PASSWORD_EXPIRE';
419420
PASSWORD_HISTORY: 'PASSWORD_HISTORY';
@@ -431,6 +432,7 @@ PLAN: 'PLAN';
431432
PLAY: 'PLAY';
432433
PRIVILEGES: 'PRIVILEGES';
433434
PROCESS: 'PROCESS';
435+
PYTHON: 'PYTHON';
434436
PLUGIN: 'PLUGIN';
435437
PLUGINS: 'PLUGINS';
436438
POLICY: 'POLICY';
@@ -603,6 +605,7 @@ VAULT: 'VAULT';
603605
VAULTS: 'VAULTS';
604606
VERBOSE: 'VERBOSE';
605607
VERSION: 'VERSION';
608+
VERSIONS: 'VERSIONS';
606609
VIEW: 'VIEW';
607610
VIEWS: 'VIEWS';
608611
WARM: 'WARM';

fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,8 @@ supportedShowStatement
475475
(FROM |IN) tableName=multipartIdentifier
476476
((FROM | IN) database=multipartIdentifier)? #showIndex
477477
| SHOW WARM UP JOB wildWhere? #showWarmUpJob
478+
| SHOW PYTHON VERSIONS #showPythonVersions
479+
| SHOW PYTHON PACKAGES IN STRING_LITERAL #showPythonPackages
478480
;
479481

480482
supportedLoadStatement
@@ -2190,6 +2192,7 @@ nonReserved
21902192
| PASSWORD_LOCK_TIME
21912193
| PASSWORD_REUSE
21922194
| PARTITIONS
2195+
| PACKAGES
21932196
| PATH
21942197
| PAUSE
21952198
| PERCENT
@@ -2209,6 +2212,7 @@ nonReserved
22092212
| PROFILE
22102213
| PROPERTIES
22112214
| PROPERTY
2215+
| PYTHON
22122216
| QUANTILE_STATE
22132217
| QUANTILE_UNION
22142218
| QUARTER
@@ -2315,6 +2319,7 @@ nonReserved
23152319
| VAULTS
23162320
| VERBOSE
23172321
| VERSION
2322+
| VERSIONS
23182323
| VIEW
23192324
| VIEWS
23202325
| WARM

0 commit comments

Comments
 (0)