diff --git a/.gitignore b/.gitignore index 0fb3108..964b4cc 100755 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,10 @@ !*/ !*.* +# 忽略任意位置的 vp_data 目录 +**/vp_data/ +key/* + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -158,3 +162,4 @@ cmake-build-debug-remote/ cmake-build-debug/ build/* !build/build.sh +.vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json index a1b30da..7275da0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,10 +8,35 @@ "name": "debug samples for videopipe", // debug for samples in videopipe "type": "cppdbg", "request": "launch", + // "program": "${workspaceFolder}/build/bin/${fileBasenameNoExtension}", "program": "${workspaceFolder}/build/bin/${fileBasenameNoExtension}", - "args": [""], + "args": ["./vp_data/test_video/face.mp4"], // change this value to the path of your vp_data "stopAtEntry": false, - "cwd": "${workspaceFolder}/..", // change this value to the path of your vp_data + // "cwd": "${workspaceFolder}/..", // change this value to the path of your vp_data + "cwd": "${workspaceFolder}/", // change this value to the path of your vp_data + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "" + }, + { + "name": "debug videopipe ex samples", // debug for ex samples in videopipe + "type": "cppdbg", + "request": "launch", + // "program": "${workspaceFolder}/build/bin/${fileBasenameNoExtension}", + "program": "${workspaceFolder}/build/bin/${fileBasenameNoExtension}", + "args": ["./vp_data/test_video/test1.mp4"], // change this value to the path of your vp_data + "stopAtEntry": false, + // "cwd": "${workspaceFolder}/..", // change this value to the path of your vp_data + "cwd": "${workspaceFolder}/", // change this value to the path of your vp_data "environment": [], "externalConsole": false, "MIMode": "gdb", @@ -46,6 +71,56 @@ ], "preLaunchTask": "" }, + { + "name": "debug cpp_llmlib samples", // debug for trt samples at third_party/trt_yolov8 + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/third_party/cpp_llmlib/build/${fileBasenameNoExtension}", + "args": [""], + "stopAtEntry": false, + "cwd": "${workspaceFolder}/", // change this value to the path of your vp_data + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "" + }, + { + "name": "debug cpp_analyzerlib samples", // debug for trt samples at third_party/trt_yolov8 + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/third_party/cpp_analyzelib/build/doubao_analyzer_app", + "args": [ + "--api-key", + "9337f332-cfd4-43f2-a862-0dc320934c7c", + "--folder", + "./vp_data/test_video", + "--max-files", + "6" + + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}/", // change this value to the path of your vp_data + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "miDebuggerPath": "/usr/bin/gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "" + }, { "name": "debug trt_yolov8 samples", // debug for trt samples at third_party/trt_yolov8 "type": "cppdbg", diff --git a/.vscode/settings.json b/.vscode/settings.json index 6d9d6d7..b3796b8 100755 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,6 @@ { "files.associations": { + "*.yaml": "yaml", "memory": "cpp", "cmath": "cpp", "cstddef": "cpp", @@ -88,7 +89,14 @@ "cinttypes": "cpp", "__nullptr": "cpp", "__locale": "cpp", - "regex": "cpp" + "regex": "cpp", + "csignal": "cpp", + "forward_list": "cpp", + "ranges": "cpp", + "shared_mutex": "cpp", + "typeindex": "cpp", + "valarray": "cpp", + "variant": "cpp" }, "C_Cpp.errorSquiggles": "Disabled" } \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..3a1e57b --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,48 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "cppbuild", + "label": "C/C++: g++ 生成活动文件", + "command": "/usr/bin/g++", + "args": [ + "-fdiagnostics-color=always", //彩色输出 + "-g", //调试信息 + "${file}", //编译当前文件 + "-o", //指定输出 + "${fileDirname}/${fileBasenameNoExtension}", //输出可执行文件路径 + "-I${workspaceFolder}", //项目头文件路径 + "-I${workspaceFolder}/third_party/cpp_llmlib", //llmlib头文件路径 + // "-I/home/whj00/VideoPipe/nodes/mllm", //mllm节点头文件路径 + "-I${workspaceFolder}/nodes", //nodes节点头文件路径 + "-I${workspaceFolder}/nodes/infers", //infers节点头文件路径 + "-I${workspaceFolder}/nodes/track", //track节点头文件路径 + "-I${workspaceFolder}/nodes/osd", //osd节点头文件路径 + "-I${workspaceFolder}/utils/analysis_board", //analysis_board工具头文件路径 + "-I/usr/local/cuda/include", + "-I/usr/include/x86_64-linux-gnu", + "-I/usr/local/include/opencv4", //OpenCV头文件路径 + "-L${fileDirname}/../build/libs", //库文件路径 + "-L/home/whj00/VideoPipe/third_party/cpp_llmlib/../build/libs", //库文件路径 + "-lvideo_pipe", //链接video_pipe库 + "-ltinyexpr", //链接tinyexpr库 + "-Wl,-rpath,${fileDirname}/../build/libs", //运行时库文件路径 + "-Wl,-rpath,/usr/local/cuda/lib64", + "-lssl", + "-lcrypto", //链接OpenSSL库 + "`pkg-config", "--cflags", "--libs", "opencv4`" //链接OpenCV库 + ], + "options": { + "cwd": "${fileDirname}" + }, + "problemMatcher": [ + "$gcc" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "detail": "编译器: /usr/bin/g++" + } + ] +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 6df9d74..3d44000 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ option(VP_WITH_CUDA "prepared CUDA or not?" OFF) option(VP_WITH_TRT "prepared TensorRT or not?" OFF) option(VP_WITH_PADDLE "prepared PaddlePaddle or not?" OFF) option(VP_WITH_KAFKA "prepared Kafka or not?" OFF) -option(VP_WITH_LLM "prepared LLM or not?" OFF) +option(VP_WITH_LLM "prepared LLM or not?" ON) option(VP_WITH_FFMPEG "prepared FFMPEG or not?" OFF) option(VP_BUILD_COMPLEX_SAMPLES "build complex samples or not? (maybe source code not provided)" OFF) @@ -105,6 +105,12 @@ if(VP_WITH_LLM) include_directories(${OPENSSL_INCLUDE_DIR}) list(APPEND VP_DEPEND_LIBS ${OPENSSL_LIBRARIES}) add_definitions(-DVP_WITH_LLM) + # # + # # 其他项目的 CMakeLists.txt + # add_subdirectory(third_party/cpp_analyzelib) + + # add_executable(mllm_analyse_sample_openai_video_ex mllm_analyse_sample_openai_video_ex.cpp) + # target_link_libraries(mllm_analyse_sample_openai_video_ex doubao::doubao_analyzer) endif() # optional for FFmpeg diff --git a/Extractor/CMakeLists.txt b/Extractor/CMakeLists.txt new file mode 100644 index 0000000..39e8c64 --- /dev/null +++ b/Extractor/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.10) +project(VideoFrameExtractor) + +find_package(OpenCV REQUIRED) + +add_executable(video_extractor video_extractor.cpp) +target_link_libraries(video_extractor ${OpenCV_LIBS}) +target_compile_features(video_extractor PRIVATE cxx_std_11) \ No newline at end of file diff --git a/Extractor/README.md b/Extractor/README.md new file mode 100644 index 0000000..29df5e5 --- /dev/null +++ b/Extractor/README.md @@ -0,0 +1,83 @@ +编译说明 +1. 安装OpenCV +首先需要安装OpenCV库: + +Ubuntu/Debian: + +bash +sudo apt-get update +sudo apt-get install libopencv-dev +Windows (使用vcpkg): + +bash +vcpkg install opencv +macOS (使用Homebrew): + +bash +brew install opencv +2. 编译命令 +使用g++编译: + +bash +g++ -std=c++11 video_extractor.cpp -o video_extractor `pkg-config --cflags --libs opencv4` +使用CMake (推荐): + +创建 CMakeLists.txt 文件: + +cmake +cmake_minimum_required(VERSION 3.10) +project(VideoFrameExtractor) + +find_package(OpenCV REQUIRED) + +add_executable(video_extractor video_extractor.cpp) +target_link_libraries(video_extractor ${OpenCV_LIBS}) +target_compile_features(video_extractor PRIVATE cxx_std_11) +编译: + +bash +mkdir build +cd build +cmake .. +make +使用方法 +1. 提取所有帧 +bash +./video_extractor input_video.mp4 ./output_frames +2. 按帧间隔提取 +bash +# 每10帧提取一帧 +./video_extractor input_video.mp4 ./output_frames -interval 10 +3. 按时间间隔提取 +bash +# 每2.5秒提取一帧 +./video_extractor input_video.mp4 ./output_frames -time 2.5 +代码特点 +命名规则: 严格按照 0, 1, 2, 3... 的序列命名JPEG文件 + +格式支持: 输出标准的JPEG格式图片 + +灵活提取: 支持全帧提取、按帧间隔提取、按时间间隔提取 + +错误处理: 完善的错误检测和处理机制 + +信息显示: 显示视频信息和提取进度 + +输出示例 +运行程序后,会在指定目录生成类似以下的文件: + +text +output_frames/ +├── 0.jpeg +├── 1.jpeg +├── 2.jpeg +├── 3.jpeg +└── ... +这个代码提供了完整的视频帧提取功能,可以根据您的具体需求进行调整和扩展。 + +whj00@navyai:~/VideoPipe/ExtractorVideo$ g++ -std=c++17 video_extractor.cpp -o video_extractor `pkg-config --cflags --libs opencv4` +video_extractor.cpp: In member function ‘bool VideoFrameExtractor::initialize(const string&, const string&)’: +video_extractor.cpp:33:30: error: ‘create_directories’ is not a member of ‘std::filesystem’ + 33 | std::filesystem::create_directories(outputDir); +中文分析处理 +这个错误是因为编译器没有找到 std::filesystem 库。我来为您提供修复后的代码,包含多种解决方案。 \ No newline at end of file diff --git a/Extractor/video_extractor.cpp b/Extractor/video_extractor.cpp new file mode 100644 index 0000000..03ddd9c --- /dev/null +++ b/Extractor/video_extractor.cpp @@ -0,0 +1,297 @@ +#include +#include +#include +#include +#include +#include // 用于mkdir +#include // 用于错误处理 + +class VideoFrameExtractor { +private: + cv::VideoCapture videoCapture; + std::string outputDir; + int frameCount; + +public: + VideoFrameExtractor() : frameCount(0) {} + + // 创建目录的辅助函数 + bool createDirectory(const std::string& path) { + // 方法1: 使用系统调用 + int result = system(("mkdir -p \"" + path + "\"").c_str()); + if (result == 0) { + std::cout << "创建目录: " << path << std::endl; + return true; + } + + // 方法2: 使用mkdir (更安全的方式) + std::string command = "mkdir -p \"" + path + "\""; + if (system(command.c_str()) == 0) { + return true; + } + + std::cerr << "警告: 无法创建目录: " << path << std::endl; + std::cerr << "请手动创建目录或检查权限" << std::endl; + return false; + } + + // 检查目录是否存在 + bool directoryExists(const std::string& path) { + struct stat info; + if (stat(path.c_str(), &info) != 0) { + return false; // 无法访问 + } + return (info.st_mode & S_IFDIR) != 0; // 是目录 + } + + // 初始化视频文件 + bool initialize(const std::string& videoPath, const std::string& outputDirectory) { + // 打开视频文件 + videoCapture.open(videoPath); + if (!videoCapture.isOpened()) { + std::cerr << "错误: 无法打开视频文件: " << videoPath << std::endl; + return false; + } + + // 设置输出目录 + outputDir = outputDirectory; + if (outputDir.back() != '/') { + outputDir += '/'; + } + + // 创建输出目录 + if (!directoryExists(outputDir)) { + std::cout << "目录不存在,尝试创建: " << outputDir << std::endl; + if (!createDirectory(outputDir)) { + std::cerr << "错误: 无法创建输出目录" << std::endl; + return false; + } + } else { + std::cout << "使用现有目录: " << outputDir << std::endl; + } + + // 显示视频信息 + double fps = videoCapture.get(cv::CAP_PROP_FPS); + double totalFrames = videoCapture.get(cv::CAP_PROP_FRAME_COUNT); + double width = videoCapture.get(cv::CAP_PROP_FRAME_WIDTH); + double height = videoCapture.get(cv::CAP_PROP_FRAME_HEIGHT); + double duration = totalFrames / fps; + + std::cout << "=== 视频信息 ===" << std::endl; + std::cout << " - 文件路径: " << videoPath << std::endl; + std::cout << " - 帧率: " << fps << " fps" << std::endl; + std::cout << " - 总帧数: " << totalFrames << " 帧" << std::endl; + std::cout << " - 分辨率: " << width << " x " << height << std::endl; + std::cout << " - 时长: " << std::fixed << std::setprecision(2) << duration << " 秒" << std::endl; + std::cout << " - 输出目录: " << outputDir << std::endl; + std::cout << "=================" << std::endl; + + return true; + } + + // 生成文件名(0,1,2,3命名规则) + std::string generateFilename(int frameNumber) { + std::stringstream filename; + filename << outputDir << frameNumber << ".jpg"; + return filename.str(); + } + + // 提取所有帧 + bool extractAllFrames() { + cv::Mat frame; + frameCount = 0; + + std::cout << "开始提取所有帧..." << std::endl; + + // 获取总帧数用于进度显示 + double totalFrames = videoCapture.get(cv::CAP_PROP_FRAME_COUNT); + + while (true) { + // 读取下一帧 + videoCapture >> frame; + + // 如果帧为空,说明视频结束 + if (frame.empty()) { + break; + } + + // 生成文件名并保存 + std::string filename = generateFilename(frameCount); + std::vector compression_params; + compression_params.push_back(cv::IMWRITE_JPEG_QUALITY); + compression_params.push_back(95); // JPEG质量参数 (0-100) + + bool success = cv::imwrite(filename, frame, compression_params); + + if (success) { + // 显示进度 + double progress = (frameCount + 1) / totalFrames * 100; + std::cout << "\r进度: " << std::fixed << std::setprecision(1) << progress + << "% - 已提取 " << frameCount + 1 << " 帧" << std::flush; + } else { + std::cerr << "\n错误: 无法保存帧: " << filename << std::endl; + return false; + } + + frameCount++; + } + + std::cout << "\n提取完成! 共提取 " << frameCount << " 帧" << std::endl; + return true; + } + + // 按间隔提取帧 + bool extractFramesByInterval(int interval) { + cv::Mat frame; + frameCount = 0; + int currentFrame = 0; + + if (interval <= 0) { + std::cerr << "错误: 间隔必须大于0" << std::endl; + return false; + } + + std::cout << "按间隔 " << interval << " 帧提取..." << std::endl; + + double totalFrames = videoCapture.get(cv::CAP_PROP_FRAME_COUNT); + + while (true) { + // 读取下一帧 + videoCapture >> frame; + + if (frame.empty()) { + break; + } + + // 如果当前帧符合间隔要求 + if (currentFrame % interval == 0) { + std::string filename = generateFilename(frameCount); + std::vector compression_params; + compression_params.push_back(cv::IMWRITE_JPEG_QUALITY); + compression_params.push_back(95); + + bool success = cv::imwrite(filename, frame, compression_params); + + if (success) { + double progress = (currentFrame + 1) / totalFrames * 100; + std::cout << "\r进度: " << std::fixed << std::setprecision(1) << progress + << "% - 已提取 " << frameCount + 1 << " 帧" << std::flush; + } else { + std::cerr << "\n错误: 无法保存帧: " << filename << std::endl; + return false; + } + + frameCount++; + } + + currentFrame++; + } + + std::cout << "\n提取完成! 共提取 " << frameCount << " 帧" << std::endl; + return true; + } + + // 按时间间隔提取帧 + bool extractFramesByTimeInterval(double timeInterval) { + double fps = videoCapture.get(cv::CAP_PROP_FPS); + int frameInterval = static_cast(fps * timeInterval); + + if (frameInterval < 1) frameInterval = 1; + + std::cout << "按时间间隔 " << timeInterval << " 秒提取 (约 " << frameInterval << " 帧)" << std::endl; + + return extractFramesByInterval(frameInterval); + } + + // 获取提取的帧数 + int getExtractedFrameCount() const { + return frameCount; + } + + ~VideoFrameExtractor() { + if (videoCapture.isOpened()) { + videoCapture.release(); + std::cout << "视频资源已释放" << std::endl; + } + } +}; + +// 显示帮助信息 +void showHelp(const std::string& programName) { + std::cout << "视频帧提取工具" << std::endl; + std::cout << "用法: " << programName << " <视频文件路径> <输出目录> [选项]" << std::endl; + std::cout << std::endl; + std::cout << "选项:" << std::endl; + std::cout << " -all 提取所有帧 (默认)" << std::endl; + std::cout << " -interval N 每N帧提取一帧" << std::endl; + std::cout << " -time T 每T秒提取一帧" << std::endl; + std::cout << " -help 显示此帮助信息" << std::endl; + std::cout << std::endl; + std::cout << "示例:" << std::endl; + std::cout << " " << programName << " input.mp4 ./output" << std::endl; + std::cout << " " << programName << " input.mp4 ./output -interval 10" << std::endl; + std::cout << " " << programName << " input.mp4 ./output -time 2.5" << std::endl; + std::cout << " " << programName << " /path/to/video.mp4 /path/to/output -all" << std::endl; +} + +// 使用示例 +int main(int argc, char* argv[]) { + // 检查参数 + if (argc < 3) { + showHelp(argv[0]); + return 1; + } + + std::string videoPath = argv[1]; + std::string outputDir = argv[2]; + + // 检查帮助参数 + if (std::string(argv[1]) == "-help" || std::string(argv[1]) == "--help") { + showHelp(argv[0]); + return 0; + } + + VideoFrameExtractor extractor; + + // 初始化视频 + if (!extractor.initialize(videoPath, outputDir)) { + return 1; + } + + // 根据参数选择提取模式 + if (argc == 3) { + // 默认提取所有帧 + return extractor.extractAllFrames() ? 0 : 1; + } else if (argc >= 4) { + std::string option = argv[3]; + + if (option == "-interval" && argc == 5) { + try { + int interval = std::stoi(argv[4]); + return extractor.extractFramesByInterval(interval) ? 0 : 1; + } catch (const std::exception& e) { + std::cerr << "错误: 无效的间隔参数" << std::endl; + return 1; + } + } else if (option == "-time" && argc == 5) { + try { + double timeInterval = std::stod(argv[4]); + return extractor.extractFramesByTimeInterval(timeInterval) ? 0 : 1; + } catch (const std::exception& e) { + std::cerr << "错误: 无效的时间参数" << std::endl; + return 1; + } + } else if (option == "-all") { + return extractor.extractAllFrames() ? 0 : 1; + } else if (option == "-help") { + showHelp(argv[0]); + return 0; + } else { + std::cerr << "错误: 无效的参数" << std::endl; + showHelp(argv[0]); + return 1; + } + } + + return 0; +} \ No newline at end of file diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 76ed855..ab427d4 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -109,6 +109,31 @@ if(VP_WITH_LLM) add_executable(mllm_analyse_sample_openai "mllm_analyse_sample_openai.cpp") target_link_libraries(mllm_analyse_sample_openai ${PROJECT_NAME}) + + add_executable(mllm_analyse_sample_openai_video "mllm_analyse_sample_openai_video.cpp") + target_link_libraries(mllm_analyse_sample_openai_video ${PROJECT_NAME}) + + # 确保只添加一次 + if(NOT TARGET doubao::doubao_analyzer) + message(STATUS "Adding cpp_analyzelib subdirectory...") + add_subdirectory(../third_party/cpp_analyzelib ${CMAKE_BINARY_DIR}/doubao_analyzer) + else() + message(STATUS "cpp_analyzelib target already exists") + endif() + + # 检查目标属性 + if(TARGET doubao::doubao_analyzer) + get_target_property(DOUBAO_SOURCES doubao::doubao_analyzer SOURCES) + get_target_property(DOUBAO_LINK_LIBS doubao::doubao_analyzer LINK_LIBRARIES) + message(STATUS "Doubao analyzer sources: ${DOUBAO_SOURCES}") + message(STATUS "Doubao analyzer link libraries: ${DOUBAO_LINK_LIBS}") + else() + message(STATUS "doubao::doubao_analyzer target not found!") + endif() + + add_executable(mllm_analyse_sample_openai_video_ex mllm_analyse_sample_openai_video_ex.cpp) + target_link_libraries(mllm_analyse_sample_openai_video_ex ${PROJECT_NAME} doubao::doubao_analyzer) + endif() # samples depend on FFmpeg diff --git a/samples/mllm_analyse_sample_openai.cpp b/samples/mllm_analyse_sample_openai.cpp index 8380554..7c12739 100644 --- a/samples/mllm_analyse_sample_openai.cpp +++ b/samples/mllm_analyse_sample_openai.cpp @@ -5,6 +5,9 @@ #include "../nodes/vp_rtmp_des_node.h" #include "../utils/analysis_board/vp_analysis_board.h" +#include "../utils/config_reader.h" + +#include /* * ## mllm_analyse_sample_openai ## @@ -17,6 +20,36 @@ int main() { VP_SET_LOG_LEVEL(vp_utils::vp_log_level::INFO); VP_LOGGER_INIT(); + + // 从配置文件读取大模型配置 + auto& configReader = ConfigReader::getInstance(); + std::string configPath = "./key/config.ini"; + + if (!configReader.loadConfig(configPath)) { + VP_ERROR("Failed to load config file: " + configPath); + return -1; + } + + // 读取配置参数 + std::string modelName = configReader.getValue("mllm_config", "model_name", ""); + std::string apiBase = configReader.getValue("mllm_config", "api_base", ""); + std::string apiKey = configReader.getValue("mllm_config", "api_key", ""); + + // 验证配置参数 + if (modelName.empty() || apiBase.empty() || apiKey.empty()) { + VP_ERROR("Invalid configuration parameters. Please check config.ini"); + VP_ERROR("Model Name: " + modelName); + VP_ERROR("API Base: " + apiBase); + VP_ERROR("API Key: " + (apiKey.empty() ? "EMPTY" : "***" + apiKey.substr(apiKey.length() - 4))); + return -1; + } + + VP_INFO("Loaded MLLM configuration:"); + VP_INFO(" Model: " + modelName); + VP_INFO(" API Base: " + apiBase); + VP_INFO(" API Key: ***" + apiKey.substr(apiKey.length() - 4)); + + // create nodes auto image_src_0 = std::make_shared("image_file_src_0", 0, "./vp_data/test_images/llm/understanding/%d.jpg", 2, 0.5); auto writing_prompt = "给图片打标签,要求包含:\n" @@ -24,12 +57,14 @@ int main() { "2. 给出的标签最多不超过5个\n" "3. 输出按以下格式:\n" "通过仔细观察图片,可以为图片赋予这些标签:['标签1', '标签2', '标签3']。"; + auto mllm_analyser_0 = std::make_shared("mllm_analyser_0", // node name - "qwen-vl-max", // mllm model name (from aliyun, support image as input) + modelName, // mllm model name (from aliyun, support image as input) writing_prompt, // prompt - "https://dashscope.aliyuncs.com/compatible-mode/v1", // api base url - "sk-XXX", // api key (from aliyun) + apiBase, // api base url + apiKey, // api key (from aliyun) llmlib::LLMBackendType::OpenAI); // backend type + auto mllm_osd_0 = std::make_shared("osd_0", "./vp_data/font/NotoSansCJKsc-Medium.otf"); auto screen_des_0 = std::make_shared("screen_des_0", 0); diff --git a/samples/mllm_analyse_sample_openai_video.cpp b/samples/mllm_analyse_sample_openai_video.cpp new file mode 100644 index 0000000..e13a111 --- /dev/null +++ b/samples/mllm_analyse_sample_openai_video.cpp @@ -0,0 +1,158 @@ +#include "../nodes/vp_file_src_node.h" +#include "../nodes/infers/vp_mllm_analyser_node.h" +#include "../nodes/osd/vp_mllm_osd_node.h" +#include "../nodes/vp_screen_des_node.h" +#include "../nodes/vp_file_des_node.h" + +#include "../utils/analysis_board/vp_analysis_board.h" +#include "../utils/config_reader.h" + +#include +#include + +/* +* ## video_mllm_analyse_sample ## +* Video analyse based on Multimodal Large Language Model. +* Read MP4 video file and analyse key frames using MLLM to generate classification labels. +*/ +int main(int argc, char* argv[]) { + VP_SET_LOG_INCLUDE_CODE_LOCATION(false); + VP_SET_LOG_INCLUDE_THREAD_ID(false); + VP_SET_LOG_LEVEL(vp_utils::vp_log_level::INFO); + VP_LOGGER_INIT(); + + // 检查命令行参数 + if (argc < 2) { + VP_ERROR("Usage: " + std::string(argv[0]) + " "); + VP_ERROR("Please provide the path to MP4 video file."); + return -1; + } + + std::string videoPath = argv[1]; + + // 检查文件是否存在 + if (!std::filesystem::exists(videoPath)) { + VP_ERROR("Video file does not exist: " + videoPath); + return -1; + } + + // 检查文件扩展名 + if (videoPath.substr(videoPath.find_last_of(".") + 1) != "mp4") { + VP_WARN("File extension is not .mp4, but will try to process anyway: " + videoPath); + } + + // 从配置文件读取大模型配置 + auto& configReader = ConfigReader::getInstance(); + std::string configPath = "./key/config.ini"; + + if (!configReader.loadConfig(configPath)) { + VP_ERROR("Failed to load config file: " + configPath); + return -1; + } + + // 读取配置参数 + std::string modelName = configReader.getValue("mllm_config", "model_name", ""); + std::string apiBase = configReader.getValue("mllm_config", "api_base", ""); + std::string apiKey = configReader.getValue("mllm_config", "api_key", ""); + + // 验证配置参数 + if (modelName.empty() || apiBase.empty() || apiKey.empty()) { + VP_ERROR("Invalid configuration parameters. Please check config.ini"); + VP_ERROR("Model Name: " + modelName); + VP_ERROR("API Base: " + apiBase); + VP_ERROR("API Key: " + (apiKey.empty() ? "EMPTY" : "***" + apiKey.substr(apiKey.length() - 4))); + return -1; + } + + VP_INFO("Loaded MLLM configuration:"); + VP_INFO(" Model: " + modelName); + VP_INFO(" API Base: " + apiBase); + VP_INFO(" API Key: ***" + apiKey.substr(apiKey.length() - 4)); + VP_INFO("Processing video: " + videoPath); + + //预处理 查询视频文件帧和时长信息 + try { + cv::VideoCapture cap(videoPath); + if (!cap.isOpened()) { + throw std::runtime_error("无法打开视频文件"); + } + + int total_frames = static_cast(cap.get(cv::CAP_PROP_FRAME_COUNT)); + double fps = cap.get(cv::CAP_PROP_FPS); + double duration = (fps > 0) ? total_frames / fps : 0; + + std::cout << "📹 视频信息: " << total_frames << "帧, " + << fps << "FPS, " << duration << "秒" << std::endl; + + // // 计算提取帧的位置 + // std::vector frame_positions; + // if (total_frames <= num_frames) { + // for (int i = 0; i < total_frames; ++i) { + // frame_positions.push_back(i); + // } + // } else { + // int step = total_frames / num_frames; + // for (int i = 0; i < num_frames; ++i) { + // frame_positions.push_back(i * step); + // } + // frame_positions.push_back(total_frames - 1); // 确保包含最后一帧 + // } + + } catch (const std::exception& e) { + std::cerr << "❌ 错误: " << e.what() << std::endl; + return -1; + } + + + // 创建节点 + // 使用文件源节点读取MP4视频,设置帧率控制以避免处理过多帧 + auto video_src_0 = std::make_shared("video_file_src_0", 0, videoPath,0.5f,false,"avdec_h264",9); + + // 定义分析提示词 + auto video_analysis_prompt = "请仔细观察视频帧画面内容,为当前画面生成准确的分类标签。\n" + "要求:\n" + "1. 仔细分析画面中的主要对象、场景、活动、颜色、情绪等特征\n" + "2. 生成的标签要具体且相关,最多不超过5个标签\n" + "3. 考虑画面的整体主题和关键元素\n" + "4. 输出格式严格按照:当前画面标签:['标签1', '标签2', '标签3']\n" + "5. 如果画面模糊或无法识别,返回:['无法识别']"; + + auto mllm_analyser_0 = std::make_shared("mllm_analyser_0", // 节点名称 + modelName, // MLLM模型名称 + video_analysis_prompt, // 分析提示词 + apiBase, // API基础URL + apiKey, // API密钥 + llmlib::LLMBackendType::OpenAI); // 后端类型 + + auto mllm_osd_0 = std::make_shared("mllm_osd_0", "./vp_data/font/NotoSansCJKsc-Medium.otf"); + + // 屏幕显示节点 - 实时显示分析结果 + auto screen_des_0 = std::make_shared("screen_des_0", 0); + + // 文件输出节点 - 可选,保存处理后的视频 + // auto file_des_0 = std::make_shared("file_des_0", "output_video_with_labels.mp4"); + + // 构建处理管道 + mllm_analyser_0->attach_to({video_src_0}); + mllm_osd_0->attach_to({mllm_analyser_0}); + screen_des_0->attach_to({mllm_osd_0}); + // file_des_0->attach_to({mllm_osd_0}); // 取消注释以保存输出视频 + + VP_INFO("Starting video analysis pipeline..."); + video_src_0->start(); + + // 调试面板 + vp_utils::vp_analysis_board board({video_src_0}); + board.display(1, false); + + // 等待处理完成或用户中断 + VP_INFO("Video analysis started. Press Enter to stop..."); + std::string wait; + std::getline(std::cin, wait); + + VP_INFO("Stopping pipeline..."); + video_src_0->detach_recursively(); + VP_INFO("Video analysis completed."); + + return 0; +} diff --git a/samples/mllm_analyse_sample_openai_video_ex.cpp b/samples/mllm_analyse_sample_openai_video_ex.cpp new file mode 100644 index 0000000..77b0992 --- /dev/null +++ b/samples/mllm_analyse_sample_openai_video_ex.cpp @@ -0,0 +1,258 @@ +#include "../nodes/vp_file_src_node.h" +#include "../nodes/infers/vp_mllm_analyser_node.h" + +#include "../utils/analysis_board/vp_analysis_board.h" +#include "../utils/config_reader.h" + +#include "../third_party/cpp_analyzelib/include/DoubaoMediaAnalyzer.hpp" +#include "../third_party/cpp_analyzelib/include/utils.hpp" +#include "../third_party/cpp_analyzelib/include/config.hpp" + + +#include +#include +#include +#include +#include + +#include + +// 提示词函数 +std::string get_image_prompt() +{ + return R"(请仔细观察图片内容,为图片生成合适的标签。要求: +1. 仔细观察图片的各个细节 +2. 生成的标签要准确反映图片内容 +3. 标签数量不超过5个 +4. 输出格式:通过分析图片,生成的标签为:['标签1', '标签2', '标签3'])"; +} + +std::string get_video_prompt() +{ + return R"(请仔细观察视频的关键帧内容,为视频生成合适的标签。要求: +1. 综合分析视频的整体内容和关键帧 +2. 生成的标签要准确反映视频的主题、场景、动作等 +3. 标签数量不超过8个 +4. 输出格式:通过分析视频,生成的标签为:['标签1', '标签2', '标签3'])"; +} + +void print_usage() +{ + std::cout << "用法: doubao_analyzer [选项]" << std::endl; + std::cout << "选项:" << std::endl; + std::cout << " --api-key KEY 豆包API密钥 (必需)" << std::endl; + std::cout << " --image PATH 单张图片路径" << std::endl; + std::cout << " --video PATH 单个视频路径" << std::endl; + std::cout << " --folder PATH 媒体文件夹路径" << std::endl; + std::cout << " --file-type TYPE 分析的文件类型 [all|image|video] (默认: all)" << std::endl; + std::cout << " --prompt TEXT 自定义提示词" << std::endl; + std::cout << " --max-files NUM 最大分析文件数量 (默认: 5)" << std::endl; + std::cout << " --video-frames NUM 视频提取帧数 (默认: 5)" << std::endl; + std::cout << " --output PATH 结果保存路径" << std::endl; + std::cout << " --help 显示此帮助信息" << std::endl; + std::cout << std::endl; + std::cout << "示例:" << std::endl; + std::cout << " doubao_analyzer --api-key YOUR_KEY --image test.jpg" << std::endl; + std::cout << " doubao_analyzer --api-key YOUR_KEY --video test.mp4 --video-frames 8" << std::endl; + std::cout << " doubao_analyzer --api-key YOUR_KEY --folder ./media --file-type all" << std::endl; +} + +void print_result(const AnalysisResult &result, const std::string &media_type) +{ + if (result.success) + { + std::cout << "✅ " << media_type << "分析成功!" << std::endl; + std::cout << "⏱️ 响应时间: " << result.response_time << "秒" << std::endl; + std::cout << "📝 分析结果:" << std::endl + << result.content << std::endl; + + auto tags = utils::extract_tags(result.content); + if (!tags.empty()) + { + std::cout << "🏷️ 提取标签: "; + for (size_t i = 0; i < tags.size(); ++i) + { + if (i > 0) + std::cout << ", "; + std::cout << tags[i]; + } + std::cout << std::endl; + } + } + else + { + std::cout << "❌ " << media_type << "分析失败: " << result.error << std::endl; + } +} + +void print_statistics(const std::vector &results) +{ + int success_count = 0; + int total_count = results.size(); + int video_count = 0; + int image_count = 0; + + double total_time = 0; + double video_total_time = 0; + double image_total_time = 0; + int video_success_count = 0; + int image_success_count = 0; + + for (const auto &result : results) + { + if (result.success) + { + success_count++; + total_time += result.response_time; + } + + if (result.raw_response.contains("type")) + { + std::string type = result.raw_response["type"]; + if (type == "video") + { + video_count++; + if (result.success) + { + video_total_time += result.response_time; + video_success_count++; + } + } + else if (type == "image") + { + image_count++; + if (result.success) + { + image_total_time += result.response_time; + image_success_count++; + } + } + } + } + + std::cout << "\n📊 分析统计:" << std::endl; + std::cout << " 总文件数: " << total_count << std::endl; + std::cout << " 成功分析: " << success_count << "/" << total_count << std::endl; + std::cout << " 图片文件: " << image_count << std::endl; + std::cout << " 视频文件: " << video_count << std::endl; + + if (success_count > 0) + { + double avg_time = total_time / success_count; + std::cout << "⏱️ 平均响应时间: " << avg_time << "秒" << std::endl; + + if (image_success_count > 0) + { + double avg_image_time = image_total_time / image_success_count; + std::cout << " 图片平均时间: " << avg_image_time << "秒" << std::endl; + } + + if (video_success_count > 0) + { + double avg_video_time = video_total_time / video_success_count; + std::cout << " 视频平均时间: " << avg_video_time << "秒" << std::endl; + } + } +} + + +/* +* ## video_classification_direct ## +* Direct video analysis using MLLM to get overall classification labels +*/ +int main(int argc, char* argv[]) { + VP_SET_LOG_INCLUDE_CODE_LOCATION(false); + VP_SET_LOG_INCLUDE_THREAD_ID(false); + VP_SET_LOG_LEVEL(vp_utils::vp_log_level::WARN); + VP_LOGGER_INIT(); + + // 检查命令行参数 + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + std::cerr << "Example: " << argv[0] << " test_video.mp4" << std::endl; + return -1; + } + + std::string videoPath = argv[1]; + + // 检查文件是否存在 + if (!std::filesystem::exists(videoPath)) { + std::cerr << "Error: Video file does not exist: " << videoPath << std::endl; + return -1; + } + + // 从配置文件读取大模型配置 + auto& configReader = ConfigReader::getInstance(); + std::string configPath = "./key/config.ini"; + + if (!configReader.loadConfig(configPath)) { + std::cerr << "Error: Failed to load config file: " << configPath << std::endl; + return -1; + } + + // 读取配置参数 + std::string modelName = configReader.getValue("mllm_config", "model_name", ""); + std::string apiBase = configReader.getValue("mllm_config", "api_base", ""); + std::string apiKey = configReader.getValue("mllm_config", "api_key", ""); + + // 验证配置参数 + if (modelName.empty() || apiBase.empty() || apiKey.empty()) { + std::cerr << "Error: Invalid configuration parameters. Please check config.ini" << std::endl; + return -1; + } + + std::cout << "==========================================" << std::endl; + std::cout << "Direct Video Classification Analysis" << std::endl; + std::cout << "==========================================" << std::endl; + std::cout << "Video: " << videoPath << std::endl; + std::cout << "Model: " << modelName << std::endl; + std::cout << "==========================================" << std::endl; + + + // 解析命令行参数 + std::string api_key; + std::string image_path; + std::string video_path; + std::string folder_path; + std::string file_type = "all"; + std::string prompt; + std::string output_path; + int max_files = 5; + int video_frames = 5; // 默认提取5帧 + + + // 创建分析器 + api_key = apiKey; // 从配置文件中读取 + video_path = videoPath; // 从命令行参数中读取 + + DoubaoMediaAnalyzer analyzer(api_key); + + std::cout << "🚀 豆包大模型媒体分析调试工具(支持图片和视频)" << std::endl; + std::cout << std::string(60, '=') << std::endl; + + // 测试连接 + if (!analyzer.test_connection()) + { + return 1; + } + + std::vector results; + + std::cout << "\n🎬 分析单个视频: " << video_path << std::endl; + std::string analysis_prompt = prompt.empty() ? get_video_prompt() : prompt; + auto result = analyzer.analyze_single_video(video_path, analysis_prompt, 2000, video_frames); + print_result(result, "视频"); + + result.raw_response["file"] = std::filesystem::path(video_path).filename().string(); + result.raw_response["path"] = video_path; + result.raw_response["type"] = "video"; + results.push_back(result); + + // 统计信息 + if (!results.empty()) + { + print_statistics(results); + } + + return 1; +} diff --git a/samples/vp_analysis_board.png b/samples/vp_analysis_board.png new file mode 100644 index 0000000..6548b1b Binary files /dev/null and b/samples/vp_analysis_board.png differ diff --git a/third_party/cpp_analyzelib/CMakeLists copy.txt b/third_party/cpp_analyzelib/CMakeLists copy.txt new file mode 100644 index 0000000..76da170 --- /dev/null +++ b/third_party/cpp_analyzelib/CMakeLists copy.txt @@ -0,0 +1,92 @@ +# third_party/cpp_analyzelib/CMakeLists.txt +cmake_minimum_required(VERSION 3.10) + +# 保护机制 +if(DEFINED DOUBAO_ANALYZER_BUILT) + message(STATUS "doubao_analyzer already built, skipping...") + return() +endif() +set(DOUBAO_ANALYZER_BUILT TRUE CACHE INTERNAL "doubao_analyzer build flag") + +project(doubao_analyzer VERSION 1.0.0 LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 查找依赖 +find_package(OpenCV REQUIRED) +find_package(CURL REQUIRED) +find_package(OpenSSL REQUIRED) + +# 包含目录 +include_directories(include) + +# 库的源文件 - 确保所有必要的源文件都包含 +set(LIB_SOURCES + src/DoubaoMediaAnalyzer.cpp + src/utils.cpp + src/config.cpp + # 如果有其他源文件,确保都添加在这里 +) + +# 检查源文件是否存在 +foreach(src_file ${LIB_SOURCES}) + if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src_file}) + message(WARNING "Source file ${src_file} does not exist") + else() + message(STATUS "Found source file: ${src_file}") + endif() +endforeach() + +# 可执行文件的源文件 +set(APP_SOURCES + src/main.cpp +) + +# 创建共享库 +add_library(doubao_analyzer_lib SHARED ${LIB_SOURCES}) + +# 设置库的属性 +set_target_properties(doubao_analyzer_lib PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION 1 + OUTPUT_NAME "doubao_analyzer" + EXPORT_NAME doubao_analyzer +) + +# 设置库的包含目录 +target_include_directories(doubao_analyzer_lib PUBLIC + $ + $ + ${OpenSSL_INCLUDE_DIR} +) + +# 链接库的依赖 +target_link_libraries(doubao_analyzer_lib + ${OpenCV_LIBS} + CURL::libcurl + OpenSSL::SSL + OpenSSL::Crypto +) + +# 添加导出宏定义 +target_compile_definitions(doubao_analyzer_lib PRIVATE DOUBAO_ANALYZER_EXPORTS) + +find_package(OpenMP REQUIRED) +if(OPENMP_FOUND) + target_link_libraries(doubao_analyzer_lib OpenMP::OpenMP_CXX) + target_compile_definitions(doubao_analyzer_lib PRIVATE WITH_OPENMP) +endif() + +# 创建可执行文件(可选) +add_executable(doubao_analyzer_app ${APP_SOURCES}) +target_link_libraries(doubao_analyzer_app doubao_analyzer_lib) + +# 设置C++标准 +target_compile_features(doubao_analyzer_lib PRIVATE cxx_std_17) +target_compile_features(doubao_analyzer_app PRIVATE cxx_std_17) + +# 创建别名以便其他项目使用 +add_library(doubao::doubao_analyzer ALIAS doubao_analyzer_lib) + +message(STATUS "doubao_analyzer_lib target created successfully") \ No newline at end of file diff --git a/third_party/cpp_analyzelib/CMakeLists-bak.txt b/third_party/cpp_analyzelib/CMakeLists-bak.txt new file mode 100644 index 0000000..109e800 --- /dev/null +++ b/third_party/cpp_analyzelib/CMakeLists-bak.txt @@ -0,0 +1,33 @@ +cmake_minimum_required(VERSION 3.10) +project(doubao_analyzer VERSION 1.0.0 LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 查找依赖 +find_package(OpenCV REQUIRED) +find_package(CURL REQUIRED) +find_package(PkgConfig REQUIRED) + +# 包含目录 +include_directories(include) + +# 源文件 +set(SOURCES + src/main.cpp + src/DoubaoMediaAnalyzer.cpp + src/utils.cpp + src/config.cpp +) + +# 创建可执行文件 +add_executable(doubao_analyzer ${SOURCES}) + +# 链接库 +target_link_libraries(doubao_analyzer + ${OpenCV_LIBS} + CURL::libcurl +) + +# 设置C++标准 +target_compile_features(doubao_analyzer PRIVATE cxx_std_17) diff --git a/third_party/cpp_analyzelib/CMakeLists.txt b/third_party/cpp_analyzelib/CMakeLists.txt new file mode 100644 index 0000000..2240802 --- /dev/null +++ b/third_party/cpp_analyzelib/CMakeLists.txt @@ -0,0 +1,93 @@ +# third_party/cpp_analyzelib/CMakeLists.txt +cmake_minimum_required(VERSION 3.10) + +# 保护机制 +if(DEFINED DOUBAO_ANALYZER_BUILT) + message(STATUS "doubao_analyzer already built, skipping...") + return() +endif() +set(DOUBAO_ANALYZER_BUILT TRUE CACHE INTERNAL "doubao_analyzer build flag") + +project(doubao_analyzer VERSION 1.0.0 LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# 查找依赖 +find_package(OpenCV REQUIRED) +find_package(CURL REQUIRED) +find_package(OpenSSL REQUIRED) + +# 包含目录 +include_directories(include) + +# 库的源文件 - 确保所有必要的源文件都包含 +set(LIB_SOURCES + src/DoubaoMediaAnalyzer.cpp + src/utils.cpp + src/config.cpp + src/ThreadPool.cpp + # 如果有其他源文件,确保都添加在这里 +) + +# 检查源文件是否存在 +foreach(src_file ${LIB_SOURCES}) + if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${src_file}) + message(WARNING "Source file ${src_file} does not exist") + else() + message(STATUS "Found source file: ${src_file}") + endif() +endforeach() + +# 可执行文件的源文件 +set(APP_SOURCES + src/main.cpp +) + +# 创建共享库 +add_library(doubao_analyzer_lib SHARED ${LIB_SOURCES}) + +# 设置库的属性 +set_target_properties(doubao_analyzer_lib PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION 1 + OUTPUT_NAME "doubao_analyzer" + EXPORT_NAME doubao_analyzer +) + +# 设置库的包含目录 +target_include_directories(doubao_analyzer_lib PUBLIC + $ + $ + ${OpenSSL_INCLUDE_DIR} +) + +# 链接库的依赖 +target_link_libraries(doubao_analyzer_lib + ${OpenCV_LIBS} + CURL::libcurl + OpenSSL::SSL + OpenSSL::Crypto +) + +# 添加导出宏定义 +target_compile_definitions(doubao_analyzer_lib PRIVATE DOUBAO_ANALYZER_EXPORTS) + +find_package(OpenMP REQUIRED) +if(OPENMP_FOUND) + target_link_libraries(doubao_analyzer_lib OpenMP::OpenMP_CXX) + target_compile_definitions(doubao_analyzer_lib PRIVATE WITH_OPENMP) +endif() + +# 创建可执行文件(可选) +add_executable(doubao_analyzer_app ${APP_SOURCES}) +target_link_libraries(doubao_analyzer_app doubao_analyzer_lib) + +# 设置C++标准 +target_compile_features(doubao_analyzer_lib PRIVATE cxx_std_17) +target_compile_features(doubao_analyzer_app PRIVATE cxx_std_17) + +# 创建别名以便其他项目使用 +add_library(doubao::doubao_analyzer ALIAS doubao_analyzer_lib) + +message(STATUS "doubao_analyzer_lib target created successfully") \ No newline at end of file diff --git a/third_party/cpp_analyzelib/README.md b/third_party/cpp_analyzelib/README.md new file mode 100644 index 0000000..30dc629 --- /dev/null +++ b/third_party/cpp_analyzelib/README.md @@ -0,0 +1,529 @@ +使用方法 +命令行模式 +BASH +# 分析单张图片 +doubao_analyzer --api-key YOUR_API_KEY --image test.jpg + +# 分析单个视频 +doubao_analyzer --api-key YOUR_API_KEY --video test.mp4 --video-frames 8 + +# 批量分析文件夹 +doubao_analyzer --api-key YOUR_API_KEY --folder ./media --file-type all --max-files 10 + +# 仅分析视频文件 +doubao_analyzer --api-key YOUR_API_KEY --folder ./videos --file-type video + +# 保存结果到文件 +doubao_analyzer --api-key YOUR_API_KEY --folder ./media --output results.json + +性能优化效果 +使用并发优化后,预计性能提升: + +关键帧提取:3-5倍加速(取决于CPU核心数) + +API调用:2-4倍加速(受API并发限制) + +总体处理时间:从约20秒减少到5-8秒 + +使用建议 +调整并发数:根据API限制调整 max_concurrent 参数 + +错误处理:添加重试机制处理API限流 + +资源管理:控制内存使用,避免同时处理过多大文件 + +这样的并发设计可以显著提高处理效率,特别是当处理大量媒体文件时 + + +🚀 豆包大模型媒体分析调试工具(支持图片和视频) +============================================================ +✅ 豆包API连接正常 + +📁 批量分析文件夹: ./vp_data/test_video (文件类型: all) +📁 找到 14 个媒体文件进行批量分析 + +============================================================ +📊 分析第 1/14 个文件: plate.mp4 +📏 文件大小: 50028557 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 4756帧, 25FPS, 190.24秒 + 提取第1/6帧 (位置: 0/4756) + 提取第2/6帧 (位置: 951/4756) + 提取第3/6帧 (位置: 1902/4756) + 提取第4/6帧 (位置: 2853/4756) + 提取第5/6帧 (位置: 3804/4756) + 提取第6/6帧 (位置: 4755/4756) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.73579秒 +📝 分析结果: 通过分析图片,生成的标签为:['公路', '货车', '车辆行驶', '蓝色护栏', '道路监控'] +🏷️ 提取标签: 公路, 货车, 车辆行驶, 蓝色护栏, 道路监控 + +============================================================ +📊 分析第 2/14 个文件: face2.mp4 +📏 文件大小: 51296424 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 25471帧, 15FPS, 1698.07秒 + 提取第1/6帧 (位置: 0/25471) + 提取第2/6帧 (位置: 5094/25471) + 提取第3/6帧 (位置: 10188/25471) + 提取第4/6帧 (位置: 15282/25471) + 提取第5/6帧 (位置: 20376/25471) +✅ 成功提取 5 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.05756秒 +📝 分析结果: 通过分析图片,生成的标签为:['访谈', '沙发', '人物对话', 'YOUKU', 'kaiwind.com'] +🏷️ 提取标签: 访谈, 沙发, 人物对话, YOUKU, kaiwind.com + +============================================================ +📊 分析第 3/14 个文件: jam.mp4 +📏 文件大小: 8653357 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 1000帧, 25FPS, 40秒 + 提取第1/6帧 (位置: 0/1000) + 提取第2/6帧 (位置: 200/1000) + 提取第3/6帧 (位置: 400/1000) + 提取第4/6帧 (位置: 600/1000) + 提取第5/6帧 (位置: 800/1000) + 提取第6/6帧 (位置: 999/1000) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.36342秒 +📝 分析结果: 通过分析图片,生成的标签为:['杭瑞高速', '云南隧道', '车辆行驶', '碧鸡关隧道', '昆明至安宁'] +🏷️ 提取标签: 杭瑞高速, 云南隧道, 车辆行驶, 碧鸡关隧道, 昆明至安宁 + +============================================================ +📊 分析第 4/14 个文件: vehicle_stop.mp4 +📏 文件大小: 8396163 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 1000帧, 25FPS, 40秒 + 提取第1/6帧 (位置: 0/1000) + 提取第2/6帧 (位置: 200/1000) + 提取第3/6帧 (位置: 400/1000) + 提取第4/6帧 (位置: 600/1000) + 提取第5/6帧 (位置: 800/1000) + 提取第6/6帧 (位置: 999/1000) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.61341秒 +📝 分析结果: 通过分析图片,生成的标签为:['高速公路', '云南', '车辆', '交通监控', '上行方向'] +🏷️ 提取标签: 高速公路, 云南, 车辆, 交通监控, 上行方向 + +============================================================ +📊 分析第 5/14 个文件: mask_rcnn.mp4 +📏 文件大小: 15604837 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 384帧, 25FPS, 15.36秒 + 提取第1/6帧 (位置: 0/384) + 提取第2/6帧 (位置: 76/384) + 提取第3/6帧 (位置: 152/384) + 提取第4/6帧 (位置: 228/384) + 提取第5/6帧 (位置: 304/384) + 提取第6/6帧 (位置: 383/384) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.24052秒 +📝 分析结果: 通过分析图片,生成的标签为:['城市街道', '交通路口', '车辆', '商铺', '行人'] +🏷️ 提取标签: 城市街道, 交通路口, 车辆, 商铺, 行人 + +============================================================ +📊 分析第 6/14 个文件: test1.mp4 +📏 文件大小: 660570 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 38帧, 25FPS, 1.52秒 + 提取第1/6帧 (位置: 0/38) + 提取第2/6帧 (位置: 7/38) + 提取第3/6帧 (位置: 14/38) + 提取第4/6帧 (位置: 21/38) + 提取第5/6帧 (位置: 28/38) + 提取第6/6帧 (位置: 37/38) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.07319秒 +📝 分析结果: 通过分析图片,生成的标签为:['木桩', '黄花', '小鸟', '自然', '森林'] +🏷️ 提取标签: 木桩, 黄花, 小鸟, 自然, 森林 + +============================================================ +📊 分析第 7/14 个文件: test.mp4 +📏 文件大小: 8323865 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 1231帧, 30FPS, 41.0333秒 + 提取第1/6帧 (位置: 0/1231) + 提取第2/6帧 (位置: 246/1231) + 提取第3/6帧 (位置: 492/1231) + 提取第4/6帧 (位置: 738/1231) + 提取第5/6帧 (位置: 984/1231) + 提取第6/6帧 (位置: 1230/1231) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.45827秒 +📝 分析结果: 通过分析图片,生成的标签为:['小鸟', '木桩', '黄花', '自然', '森林'] +🏷️ 提取标签: 小鸟, 木桩, 黄花, 自然, 森林 + +============================================================ +📊 分析第 8/14 个文件: enet_seg.mp4 +📏 文件大小: 12522093 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 51帧, 1FPS, 51秒 + 提取第1/6帧 (位置: 0/51) + 提取第2/6帧 (位置: 10/51) + 提取第3/6帧 (位置: 20/51) + 提取第4/6帧 (位置: 30/51) + 提取第5/6帧 (位置: 40/51) + 提取第6/6帧 (位置: 50/51) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.26243秒 +📝 分析结果: 通过分析图片,生成的标签为:['城市街道', '车辆', '建筑', '交通信号灯', '砖石路面'] +🏷️ 提取标签: 城市街道, 车辆, 建筑, 交通信号灯, 砖石路面 + +============================================================ +📊 分析第 9/14 个文件: 88.jpg +📏 文件大小: 126177 字节 +🖼️ 检测到图片文件 +🖼️ 图片尺寸: 1280x720 +✅ 分析成功! +⏱️ 响应时间: 1.31709秒 +📝 分析结果: 通过分析图片,生成的标签为:['赵本山', '访谈', '西装', 'YOUKU', '节目评价'] +🏷️ 提取标签: 赵本山, 访谈, 西装, YOUKU, 节目评价 + +============================================================ +📊 分析第 10/14 个文件: face.mp4 +📏 文件大小: 14952768 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 2986帧, 25FPS, 119.44秒 + 提取第1/6帧 (位置: 0/2986) + 提取第2/6帧 (位置: 597/2986) + 提取第3/6帧 (位置: 1194/2986) + 提取第4/6帧 (位置: 1791/2986) + 提取第5/6帧 (位置: 2388/2986) + 提取第6/6帧 (位置: 2985/2986) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.40615秒 +📝 分析结果: 通过分析图片,生成的标签为:['赵本山', '春晚', '访谈', '生病', '备稿'] +🏷️ 提取标签: 赵本山, 春晚, 访谈, 生病, 备稿 + +============================================================ +📊 分析第 11/14 个文件: ocr.mp4 +📏 文件大小: 1781995 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 133帧, 25FPS, 5.32秒 + 提取第1/6帧 (位置: 0/133) + 提取第2/6帧 (位置: 26/133) + 提取第3/6帧 (位置: 52/133) + 提取第4/6帧 (位置: 78/133) + 提取第5/6帧 (位置: 104/133) + 提取第6/6帧 (位置: 132/133) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.30091秒 +📝 分析结果: 通过分析图片,生成的标签为:['中国南方航空', '登机牌', '机票'] +🏷️ 提取标签: 中国南方航空, 登机牌, 机票 + +============================================================ +📊 分析第 12/14 个文件: vehicle_count.mp4 +📏 文件大小: 144941374 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 13791帧, 25FPS, 551.64秒 + 提取第1/6帧 (位置: 0/13791) + 提取第2/6帧 (位置: 2758/13791) + 提取第3/6帧 (位置: 5516/13791) + 提取第4/6帧 (位置: 8274/13791) + 提取第5/6帧 (位置: 11032/13791) + 提取第6/6帧 (位置: 13790/13791) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.54572秒 +📝 分析结果: 通过分析图片,生成的标签为:['高速公路', '车辆行驶', '道路景观', '交通场景', '绿化隔离带'] +🏷️ 提取标签: 高速公路, 车辆行驶, 道路景观, 交通场景, 绿化隔离带 + +============================================================ +📊 分析第 13/14 个文件: pose.mp4 +📏 文件大小: 632599 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 117帧, 25FPS, 4.68秒 + 提取第1/6帧 (位置: 0/117) + 提取第2/6帧 (位置: 23/117) + 提取第3/6帧 (位置: 46/117) + 提取第4/6帧 (位置: 69/117) + 提取第5/6帧 (位置: 92/117) + 提取第6/6帧 (位置: 116/117) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.20218秒 +📝 分析结果: 通过分析图片,生成的标签为:['舞蹈', '室内', '男子', '蓝色上衣', '木地板'] +🏷️ 提取标签: 舞蹈, 室内, 男子, 蓝色上衣, 木地板 + +============================================================ +📊 分析第 14/14 个文件: jam2.mp4 +📏 文件大小: 10114048 字节 +🎬 检测到视频文件 +🎬 正在提取视频关键帧... +📹 视频信息: 1000帧, 25FPS, 40秒 + 提取第1/6帧 (位置: 0/1000) + 提取第2/6帧 (位置: 200/1000) + 提取第3/6帧 (位置: 400/1000) + 提取第4/6帧 (位置: 600/1000) + 提取第5/6帧 (位置: 800/1000) + 提取第6/6帧 (位置: 999/1000) +✅ 成功提取 6 个关键帧 +✅ 分析成功! +⏱️ 响应时间: 1.54297秒 +📝 分析结果: 通过分析图片,生成的标签为:['昆安高速', '云南昆明', '交通拥堵', '城市高架', '车辆'] +🏷️ 提取标签: 昆安高速, 云南昆明, 交通拥堵, 城市高架, 车辆 + +📊 分析统计: + 总文件数: 14 + 成功分析: 14/14 + 图片文件: 1 + 视频文件: 13 +⏱️ 平均响应时间: 1.36568秒 + 图片平均时间: 1.31709秒 + 视频平均时间: 1.36942秒 + + +------------- +🚀 豆包大模型媒体分析调试工具(支持图片和视频) +============================================================ +✅ 豆包API连接正常 +📁 找到 14 个媒体文件进行并发分析 +🎬 正在提取视频关键帧... +🎬 正在提取视频关键帧... +🎬 正在提取视频关键帧... +📹 视频信息: 4756帧, 25FPS, 190.24秒 +📹 视频信息: 1000帧, 25FPS, 40秒 +📹 视频信息: 25471帧, 15FPS, 1698.07秒 + 提取第1/6帧 (位置: 0/25471) + 提取第2/6帧 (位置: 5094/25471) + 提取第1/6帧 (位置: 0/1000) + 提取第1/6帧 (位置: 0/4756) + 提取第3/6帧 (位置: 10188/25471) + 提取第4/6帧 (位置: 15282/25471) + 提取第2/6帧 (位置: 951/4756) + 提取第5/6帧 (位置: 20376/25471) + 提取第3/6帧 (位置: 1902/4756) +✅ 成功提取 5 个关键帧 + 提取第2/6帧 (位置: 200/1000) + 提取第4/6帧 (位置: 2853/4756) + 提取第5/6帧 (位置: 3804/4756) + 提取第6/6帧 (位置: 4755/4756) +✅ 成功提取 6 个关键帧 + 提取第3/6帧 (位置: 400/1000) + 提取第4/6帧 (位置: 600/1000) + 提取第5/6帧 (位置: 800/1000) + 提取第6/6帧 (位置: 999/1000) +✅ 成功提取 6 个关键帧 +🎬 正在提取视频关键帧... +📹 视频信息: 1000帧, 25FPS, 40秒 + 提取第1/6帧 (位置: 0/1000) + 提取第2/6帧 (位置: 200/1000) + 提取第3/6帧 (位置: 400/1000) + 提取第4/6帧 (位置: 600/1000) + 提取第5/6帧 (位置: 800/1000) + 提取第6/6帧 (位置: 999/1000) +✅ 成功提取 6 个关键帧 +🎬 正在提取视频关键帧... +📹 视频信息: 384帧, 25FPS, 15.36秒 + 提取第1/6帧 (位置: 0/384) + 提取第2/6帧 (位置: 76/384) + 提取第3/6帧 (位置: 152/384) + 提取第4/6帧 (位置: 228/384) + 提取第5/6帧 (位置: 304/384) + 提取第6/6帧 (位置: 383/384) +✅ 成功提取 6 个关键帧 +🎬 正在提取视频关键帧... +📹 视频信息: 38帧, 25FPS, 1.52秒 + 提取第1/6帧 (位置: 0/38) + 提取第2/6帧 (位置: 7/38) + 提取第3/6帧 (位置: 14/38) + 提取第4/6帧 (位置: 21/38) + 提取第5/6帧 (位置: 28/38) + 提取第6/6帧 (位置: 37/38) +✅ 成功提取 6 个关键帧 +🎬 正在提取视频关键帧... +📹 视频信息: 1231帧, 30FPS, 41.0333秒 + 提取第1/6帧 (位置: 0/1231) +🎬 正在提取视频关键帧... +📹 视频信息: 51帧, 1FPS, 51秒 + 提取第2/6帧 (位置: 246/1231) + 提取第1/6帧 (位置: 0/51) + 提取第2/6帧 (位置: 10/51) + 提取第3/6帧 (位置: 492/1231) + 提取第3/6帧 (位置: 20/51) +🎬 正在提取视频关键帧... +📹 视频信息: 1帧, 25FPS, 0.04秒 + 提取第1/1帧 (位置: 0/1) +✅ 成功提取 1 个关键帧 + 提取第4/6帧 (位置: 738/1231) + 提取第4/6帧 (位置: 30/51) + 提取第5/6帧 (位置: 984/1231) + 提取第5/6帧 (位置: 40/51) + 提取第6/6帧 (位置: 1230/1231) +✅ 成功提取 6 个关键帧 + 提取第6/6帧 (位置: 50/51) +✅ 成功提取 6 个关键帧 +🎬 正在提取视频关键帧... +📹 视频信息: 2986帧, 25FPS, 119.44秒 + 提取第1/6帧 (位置: 0/2986) + 提取第2/6帧 (位置: 597/2986) + 提取第3/6帧 (位置: 1194/2986) + 提取第4/6帧 (位置: 1791/2986) + 提取第5/6帧 (位置: 2388/2986) + 提取第6/6帧 (位置: 2985/2986) +✅ 成功提取 6 个关键帧 +🎬 正在提取视频关键帧... +📹 视频信息: 133帧, 25FPS, 5.32秒 + 提取第1/6帧 (位置: 0/133) + 提取第2/6帧 (位置: 26/133) +🎬 正在提取视频关键帧... + 提取第3/6帧 (位置: 52/133) +📹 视频信息: 13791帧, 25FPS, 551.64秒 + 提取第1/6帧 (位置: 0/13791) + 提取第2/6帧 (位置: 2758/13791) + 提取第4/6帧 (位置: 78/133) + 提取第3/6帧 (位置: 5516/13791) + 提取第4/6帧 (位置: 8274/13791) + 提取第5/6帧 (位置: 11032/13791) + 提取第6/6帧 (位置: 13790/13791) +✅ 成功提取 6 个关键帧 + 提取第5/6帧 (位置: 104/133) + 提取第6/6帧 (位置: 132/133) +✅ 成功提取 6 个关键帧 +🎬 正在提取视频关键帧... +📹 视频信息: 117帧, 25FPS, 4.68秒 + 提取第1/6帧 (位置: 0/117) + 提取第2/6帧 (位置: 23/117) + 提取第3/6帧 (位置: 46/117) + 提取第4/6帧 (位置: 69/117) + 提取第5/6帧 (位置: 92/117) + 提取第6/6帧 (位置: 116/117) +✅ 成功提取 6 个关键帧 +🎬 正在提取视频关键帧... +📹 视频信息: 1000帧, 25FPS, 40秒 + 提取第1/6帧 (位置: 0/1000) + 提取第2/6帧 (位置: 200/1000) + 提取第3/6帧 (位置: 400/1000) + 提取第4/6帧 (位置: 600/1000) + 提取第5/6帧 (位置: 800/1000) + 提取第6/6帧 (位置: 999/1000) +✅ 成功提取 6 个关键帧 + +============================================================ +📊 分析第 1/14 个文件: plate.mp4 +✅ 分析成功! +⏱️ 响应时间: 2.52198秒 +📝 分析结果: 通过分析视频,生成的标签为:['公路', '货车', '车辆行驶', '道路监控', '算法授权失败', '蓝色护栏', '绿色植被', '时间戳'] +🏷️ 提取标签: 公路, 货车, 车辆行驶, 道路监控, 算法授权失败, 蓝色护栏, 绿色植被, 时间戳 + +============================================================ +📊 分析第 2/14 个文件: face2.mp4 +✅ 分析成功! +⏱️ 响应时间: 1.59203秒 +📝 分析结果: 通过分析视频,生成的标签为:['访谈', '人物对话', '沙发场景', 'YOUKU', 'kaiwind.com', '曲艺话题', '幽默言论'] +🏷️ 提取标签: 访谈, 人物对话, 沙发场景, YOUKU, kaiwind.com, 曲艺话题, 幽默言论 + +============================================================ +📊 分析第 3/14 个文件: jam.mp4 +✅ 分析成功! +⏱️ 响应时间: 3.11522秒 +📝 分析结果: 通过分析视频,生成的标签为:['杭瑞高速', '云南隧道', '车辆行驶', '碧鸡关隧道', '昆明至安宁', '交通监控', '夜间行车', '高速公路'] +🏷️ 提取标签: 杭瑞高速, 云南隧道, 车辆行驶, 碧鸡关隧道, 昆明至安宁, 交通监控, 夜间行车, 高速公路 + +============================================================ +📊 分析第 4/14 个文件: vehicle_stop.mp4 +✅ 分析成功! +⏱️ 响应时间: 2.49462秒 +📝 分析结果: 通过分析视频,生成的标签为:['高速公路', '云南', '昆磨高速', '车辆行驶', '交通监控', '上行方向', '山区路段', '公路风景'] +🏷️ 提取标签: 高速公路, 云南, 昆磨高速, 车辆行驶, 交通监控, 上行方向, 山区路段, 公路风景 + +============================================================ +📊 分析第 5/14 个文件: mask_rcnn.mp4 +✅ 分析成功! +⏱️ 响应时间: 1.66359秒 +📝 分析结果: 通过分析视频,生成的标签为:['城市街道', '交通路口', '车辆行驶', '汽车', '行人', '商铺', '斑马线', '日常街景'] +🏷️ 提取标签: 城市街道, 交通路口, 车辆行驶, 汽车, 行人, 商铺, 斑马线, 日常街景 + +============================================================ +📊 分析第 6/14 个文件: test1.mp4 +✅ 分析成功! +⏱️ 响应时间: 1.4529秒 +📝 分析结果: 通过分析视频,生成的标签为:['自然风景', '黄色小花', '枯木桩', '小鸟', '森林背景', '野生动物', '春季景象', '熊猫办公'] +🏷️ 提取标签: 自然风景, 黄色小花, 枯木桩, 小鸟, 森林背景, 野生动物, 春季景象, 熊猫办公 + +============================================================ +📊 分析第 7/14 个文件: test.mp4 +✅ 分析成功! +⏱️ 响应时间: 2.29719秒 +📝 分析结果: 通过分析视频,生成的标签为:['小鸟', '木柱', '黄花', '自然', '森林', '鸟类', '栖息', '觅食'] +🏷️ 提取标签: 小鸟, 木柱, 黄花, 自然, 森林, 鸟类, 栖息, 觅食 + +============================================================ +📊 分析第 8/14 个文件: enet_seg.mp4 +✅ 分析成功! +⏱️ 响应时间: 2.34346秒 +📝 分析结果: 通过分析视频,生成的标签为:['城市街道', '车辆行驶', '建筑', '交通信号灯', '汽车', '鹅卵石路', '城市景观', '街景'] +🏷️ 提取标签: 城市街道, 车辆行驶, 建筑, 交通信号灯, 汽车, 鹅卵石路, 城市景观, 街景 + +============================================================ +📊 分析第 9/14 个文件: 88.jpg +✅ 分析成功! +⏱️ 响应时间: 1.15917秒 +📝 分析结果: 通过分析视频,生成的标签为:['访谈', '人物', '西装', '评价节目', '室内场景', 'YOUKU', '对话'] +🏷️ 提取标签: 访谈, 人物, 西装, 评价节目, 室内场景, YOUKU, 对话 + +============================================================ +📊 分析第 10/14 个文件: face.mp4 +✅ 分析成功! +⏱️ 响应时间: 1.45879秒 +📝 分析结果: 通过分析视频,生成的标签为:['赵本山', '春晚', '备战', '生病', '访谈', '春晚审查', '幕后故事', '紧张'] +🏷️ 提取标签: 赵本山, 春晚, 备战, 生病, 访谈, 春晚审查, 幕后故事, 紧张 + +============================================================ +📊 分析第 11/14 个文件: ocr.mp4 +✅ 分析成功! +⏱️ 响应时间: 2.41205秒 +📝 分析结果: 通过分析视频,生成的标签为:['机票', '银行广告', '超市小票', '书籍', '餐饮小票', '火车票'] +🏷️ 提取标签: 机票, 银行广告, 超市小票, 书籍, 餐饮小票, 火车票 + +============================================================ +📊 分析第 12/14 个文件: vehicle_count.mp4 +✅ 分析成功! +⏱️ 响应时间: 2.05704秒 +📝 分析结果: 通过分析视频,生成的标签为:['高速公路', '车辆行驶', '交通场景', '公路', '车流', '绿化', '道路', '汉宜高速'] +🏷️ 提取标签: 高速公路, 车辆行驶, 交通场景, 公路, 车流, 绿化, 道路, 汉宜高速 + +============================================================ +📊 分析第 13/14 个文件: pose.mp4 +✅ 分析成功! +⏱️ 响应时间: 1.45023秒 +📝 分析结果: 通过分析视频,生成的标签为:['舞蹈', '室内', '地板动作', '男子', '蓝色上衣', '砖墙背景', '现代舞', '街舞'] +🏷️ 提取标签: 舞蹈, 室内, 地板动作, 男子, 蓝色上衣, 砖墙背景, 现代舞, 街舞 + +============================================================ +📊 分析第 14/14 个文件: jam2.mp4 +✅ 分析成功! +⏱️ 响应时间: 1.79763秒 +📝 分析结果: 通过分析视频,生成的标签为:['昆安高速', '云南昆明', '交通拥堵', '城市高架', '车辆行驶', '安宁至昆明', '高速公路', '城市景观'] +🏷️ 提取标签: 昆安高速, 云南昆明, 交通拥堵, 城市高架, 车辆行驶, 安宁至昆明, 高速公路, 城市景观 + +📊 并发分析统计: + 总文件数: 14 + 成功分析: 14/14 + 图片文件: 1 + 视频文件: 13 +⏱️ 总耗时: 10.3853秒 + 平均响应时间: 1.98685秒 + 并发加速比: 2.67838倍 \ No newline at end of file diff --git a/third_party/cpp_analyzelib/cmake/doubao_analyzerConfig.cmake.in b/third_party/cpp_analyzelib/cmake/doubao_analyzerConfig.cmake.in new file mode 100644 index 0000000..8da59be --- /dev/null +++ b/third_party/cpp_analyzelib/cmake/doubao_analyzerConfig.cmake.in @@ -0,0 +1,13 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +# 查找依赖 +find_dependency(OpenCV REQUIRED) +find_dependency(CURL REQUIRED) + +# 包含目标文件 +include("${CMAKE_CURRENT_LIST_DIR}/doubao_analyzerTargets.cmake") + +# 检查目标是否存在 +check_required_components(doubao_analyzer) \ No newline at end of file diff --git a/third_party/cpp_analyzelib/include/DoubaoMediaAnalyzer.hpp b/third_party/cpp_analyzelib/include/DoubaoMediaAnalyzer.hpp new file mode 100644 index 0000000..c34a3ee --- /dev/null +++ b/third_party/cpp_analyzelib/include/DoubaoMediaAnalyzer.hpp @@ -0,0 +1,104 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "ThreadPool.h" // 直接包含,而不是前向声明 +#include // 包含OpenCV头文件 + +#ifdef _WIN32 + #ifdef DOUBAO_ANALYZER_EXPORTS + #define DOUBAO_API __declspec(dllexport) + #else + #define DOUBAO_API __declspec(dllimport) + #endif +#else + #define DOUBAO_API __attribute__((visibility("default"))) +#endif + + +struct BatchAnalysisResult { + std::string filename; + bool success; + std::string result; + std::vector tags; + double processing_time; + std::string error_message; +}; + + + +struct AnalysisResult { + bool success; + std::string content; + double response_time; + nlohmann::json usage; + nlohmann::json raw_response; + std::string error; + + AnalysisResult() : success(false), response_time(0.0) {} +}; + +class DoubaoMediaAnalyzer { +private: + std::string api_key_; + std::string base_url_; + std::shared_ptr thread_pool; // 添加线程池成员变量 + +public: + explicit DoubaoMediaAnalyzer(const std::string& api_key); + + // 连接测试 + bool test_connection(); + + // 单张图片分析 + AnalysisResult analyze_single_image(const std::string& image_path, + const std::string& prompt, + int max_tokens = 1500); + + // 单个视频分析 + AnalysisResult analyze_single_video(const std::string& video_path, + const std::string& prompt, + int max_tokens = 2000, + int num_frames = 5); + + // 批量分析 + std::vector batch_analyze(const std::string& media_folder, + const std::string& prompt, + int max_files = 5, + const std::string& file_type = "all"); + + // 标签提取 + std::vector extract_tags(const std::string& content); + + // 并发批量分析 + std::vector analyze_batch_concurrent( + const std::vector& file_paths, + const std::string& prompt = "", + int max_frames = 10, + int frame_interval = 1, + int max_concurrent = 5 + ); + + // 添加缺失的方法声明 + std::vector extract_keyframes_concurrent(const std::string& video_path, + int max_frames = 10, + int frame_interval = 1); + + ~DoubaoMediaAnalyzer(); // 添加这行 + +private: + // 内部方法 + std::vector extract_video_frames(const std::string& video_path, int num_frames); + AnalysisResult send_analysis_request(const nlohmann::json& payload, int timeout); + AnalysisResult process_response(const std::string& response_text, double response_time); + + // HTTP请求 + std::string make_http_request(const std::string& url, + const std::string& method, + const std::string& data, + const std::vector& headers, + int timeout); +}; diff --git a/third_party/cpp_analyzelib/include/ThreadPool.h b/third_party/cpp_analyzelib/include/ThreadPool.h new file mode 100644 index 0000000..760c0ab --- /dev/null +++ b/third_party/cpp_analyzelib/include/ThreadPool.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +class ThreadPool { +public: + explicit ThreadPool(size_t threads = std::thread::hardware_concurrency()); + ~ThreadPool(); + + template + auto enqueue(F&& f, Args&&... args) + -> std::future::type>; + + size_t size() const { return workers.size(); } + +private: + std::vector workers; + std::queue> tasks; + + std::mutex queue_mutex; + std::condition_variable condition; + bool stop; +}; + +// 模板函数实现 +template +auto ThreadPool::enqueue(F&& f, Args&&... args) + -> std::future::type> { + using return_type = typename std::result_of::type; + + auto task = std::make_shared>( + std::bind(std::forward(f), std::forward(args)...) + ); + + std::future res = task->get_future(); + { + std::unique_lock lock(queue_mutex); + + if(stop) + throw std::runtime_error("enqueue on stopped ThreadPool"); + + tasks.emplace([task](){ (*task)(); }); + } + condition.notify_one(); + return res; +} \ No newline at end of file diff --git a/third_party/cpp_analyzelib/include/config.hpp b/third_party/cpp_analyzelib/include/config.hpp new file mode 100644 index 0000000..7b02eb5 --- /dev/null +++ b/third_party/cpp_analyzelib/include/config.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace config { + // API配置 + extern const std::string BASE_URL; + extern const std::string MODEL_NAME; + + // 默认值 + extern const int DEFAULT_MAX_TOKENS; + extern const int DEFAULT_VIDEO_FRAMES; + extern const int DEFAULT_MAX_FILES; + extern const double DEFAULT_TEMPERATURE; + + // 超时设置(秒) + extern const int CONNECTION_TIMEOUT; + extern const int IMAGE_ANALYSIS_TIMEOUT; + extern const int VIDEO_ANALYSIS_TIMEOUT; + + // 文件扩展名 + extern const std::vector IMAGE_EXTENSIONS; + extern const std::vector VIDEO_EXTENSIONS; +} diff --git a/third_party/cpp_analyzelib/include/utils.hpp b/third_party/cpp_analyzelib/include/utils.hpp new file mode 100644 index 0000000..352d771 --- /dev/null +++ b/third_party/cpp_analyzelib/include/utils.hpp @@ -0,0 +1,72 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace config { + // API配置 + extern const std::string BASE_URL; + extern const std::string MODEL_NAME; + + // 默认值 + extern const int DEFAULT_MAX_TOKENS; + extern const int DEFAULT_VIDEO_FRAMES; + extern const int DEFAULT_MAX_FILES; + extern const double DEFAULT_TEMPERATURE; + + // 超时设置(秒) + extern const int CONNECTION_TIMEOUT; + extern const int IMAGE_ANALYSIS_TIMEOUT; + extern const int VIDEO_ANALYSIS_TIMEOUT; + + // 文件扩展名 + extern const std::vector IMAGE_EXTENSIONS; + extern const std::vector VIDEO_EXTENSIONS; +} + +namespace utils { + // 字符串工具 + std::string to_lower(const std::string& str); + std::vector split(const std::string& str, char delimiter); + std::string trim(const std::string& str); + std::string trim(const std::string& str, const std::string& chars_to_trim); + bool starts_with(const std::string& str, const std::string& prefix); + bool ends_with(const std::string& str, const std::string& suffix); + + // 文件工具 + bool file_exists(const std::string& path); + std::string get_file_extension(const std::string& path); + bool is_image_file(const std::string& path); + bool is_video_file(const std::string& path); + std::vector find_media_files(const std::string& folder, + const std::string& file_type = "all", + int max_files = 5); + + // Base64编码 + std::string base64_encode(const std::vector& data); + std::string base64_encode_file(const std::string& file_path); + + // 图像处理 + std::vector encode_image_to_jpeg(const cv::Mat& image, int quality = 85); + cv::Mat resize_image(const cv::Mat& image, int max_size = 800); + + // JSON工具 + nlohmann::json parse_json(const std::string& json_str); + std::string json_to_string(const nlohmann::json& j); + + // 标签提取 + std::vector extract_tags(const std::string& content); + + // 时间工具 + double get_current_time(); + void sleep_seconds(int seconds); + + // 或者更兼容的版本 + std::string get_filename(const std::string& filepath); +} diff --git a/third_party/cpp_analyzelib/key/config.ini b/third_party/cpp_analyzelib/key/config.ini new file mode 100644 index 0000000..827f111 --- /dev/null +++ b/third_party/cpp_analyzelib/key/config.ini @@ -0,0 +1,4 @@ +[mllm_config] +model_name =doubao-1.5-vision-lite-250315 +api_base =https://ark.cn-beijing.volces.com/api/v3/chat/completions +api_key = 9337f332-cfd4-43f2-a862-0dc320934c7c \ No newline at end of file diff --git a/third_party/cpp_analyzelib/src/DoubaoMediaAnalyzer.cpp b/third_party/cpp_analyzelib/src/DoubaoMediaAnalyzer.cpp new file mode 100644 index 0000000..56280ba --- /dev/null +++ b/third_party/cpp_analyzelib/src/DoubaoMediaAnalyzer.cpp @@ -0,0 +1,526 @@ +#include "../include/DoubaoMediaAnalyzer.hpp" +#include "../include/utils.hpp" +#include "../include/config.hpp" +#include +#include +#include + +#include + +// 并发提取关键帧函数 +std::vector DoubaoMediaAnalyzer::extract_keyframes_concurrent(const std::string& video_path, int max_frames, int frame_interval) { + cv::VideoCapture cap(video_path); + if (!cap.isOpened()) { + throw std::runtime_error("无法打开视频文件: " + video_path); + } + + int total_frames = static_cast(cap.get(cv::CAP_PROP_FRAME_COUNT)); + int fps = static_cast(cap.get(cv::CAP_PROP_FPS)); + + if (total_frames <= 0) { + throw std::runtime_error("无法获取视频帧数"); + } + + // 计算实际要提取的帧数 + int actual_frames = std::min(max_frames, total_frames); + if (frame_interval > 1) { + actual_frames = std::min(max_frames, total_frames / frame_interval); + } + + std::vector frames; + frames.reserve(actual_frames); + + // 计算帧位置 + std::vector frame_positions; + for (int i = 0; i < actual_frames; ++i) { + int pos = (total_frames - 1) * i / std::max(1, actual_frames - 1); + frame_positions.push_back(pos); + } + + // 并行提取帧 + #pragma omp parallel for schedule(dynamic) + for (int i = 0; i < actual_frames; ++i) { + cv::Mat frame; + cv::VideoCapture local_cap(video_path); + local_cap.set(cv::CAP_PROP_POS_FRAMES, frame_positions[i]); + local_cap.read(frame); + + if (!frame.empty()) { + #pragma omp critical + frames.push_back(frame.clone()); + } + local_cap.release(); + } + + cap.release(); + return frames; +} + + +// HTTP回调函数 +static size_t write_callback(void* contents, size_t size, size_t nmemb, std::string* response) { + size_t total_size = size * nmemb; + response->append(static_cast(contents), total_size); + return total_size; +} + +DoubaoMediaAnalyzer::DoubaoMediaAnalyzer(const std::string& api_key) + : api_key_(api_key), base_url_(config::BASE_URL) { + curl_global_init(CURL_GLOBAL_DEFAULT); +} + +DoubaoMediaAnalyzer::~DoubaoMediaAnalyzer() { + curl_global_cleanup(); +} + +bool DoubaoMediaAnalyzer::test_connection() { + try { + nlohmann::json payload = { + {"model", config::MODEL_NAME}, + {"messages", { + { + {"role", "user"}, + {"content", "请回复'连接测试成功'"} + } + }}, + {"max_tokens", 50} + }; + + auto result = send_analysis_request(payload, config::CONNECTION_TIMEOUT); + + if (result.success) { + std::cout << "✅ 豆包API连接正常" << std::endl; + return true; + } else { + std::cout << "❌ API连接失败: " << result.error << std::endl; + return false; + } + } catch (const std::exception& e) { + std::cout << "❌ 连接测试异常: " << e.what() << std::endl; + return false; + } +} + +AnalysisResult DoubaoMediaAnalyzer::analyze_single_image(const std::string& image_path, + const std::string& prompt, + int max_tokens) { + AnalysisResult result; + + try { + if (!utils::file_exists(image_path)) { + result.success = false; + result.error = "图片文件不存在: " + image_path; + return result; + } + + std::string image_data = utils::base64_encode_file(image_path); + + nlohmann::json payload = { + {"model", config::MODEL_NAME}, + {"messages", { + { + {"role", "user"}, + {"content", { + { + {"type", "image_url"}, + {"image_url", { + {"url", "data:image/jpeg;base64," + image_data} + }} + }, + { + {"type", "text"}, + {"text", prompt} + } + }} + } + }}, + {"max_tokens", max_tokens}, + {"temperature", config::DEFAULT_TEMPERATURE}, + {"stream", false} + }; + + double start_time = utils::get_current_time(); + result = send_analysis_request(payload, config::IMAGE_ANALYSIS_TIMEOUT); + result.response_time = utils::get_current_time() - start_time; + + } catch (const std::exception& e) { + result.success = false; + result.error = "分析异常: " + std::string(e.what()); + } + + return result; +} + +AnalysisResult DoubaoMediaAnalyzer::analyze_single_video(const std::string& video_path, + const std::string& prompt, + int max_tokens, + int num_frames) { + AnalysisResult result; + + try { + if (!utils::file_exists(video_path)) { + result.success = false; + result.error = "视频文件不存在: " + video_path; + return result; + } + + std::cout << "🎬 正在提取视频关键帧..." << std::endl; + auto frames_base64 = extract_video_frames(video_path, num_frames); + + if (frames_base64.empty()) { + result.success = false; + result.error = "无法从视频中提取有效帧"; + return result; + } + + std::cout << "✅ 成功提取 " << frames_base64.size() << " 个关键帧" << std::endl; + + // 构建多图消息 + nlohmann::json content = nlohmann::json::array(); + content.push_back({{"type", "text"}, {"text", prompt}}); + + for (size_t i = 0; i < frames_base64.size(); ++i) { + content.push_back({ + {"type", "image_url"}, + {"image_url", { + {"url", "data:image/jpeg;base64," + frames_base64[i]}, + {"detail", "low"} + }} + }); + + content.push_back({ + {"type", "text"}, + {"text", "这是视频的第" + std::to_string(i+1) + "个关键帧"} + }); + } + + nlohmann::json payload = { + {"model", config::MODEL_NAME}, + {"messages", { + { + {"role", "user"}, + {"content", content} + } + }}, + {"max_tokens", max_tokens}, + {"temperature", config::DEFAULT_TEMPERATURE}, + {"stream", false} + }; + + double start_time = utils::get_current_time(); + result = send_analysis_request(payload, config::VIDEO_ANALYSIS_TIMEOUT); + result.response_time = utils::get_current_time() - start_time; + + } catch (const std::exception& e) { + result.success = false; + result.error = "视频分析异常: " + std::string(e.what()); + } + + return result; +} + +std::vector DoubaoMediaAnalyzer::batch_analyze(const std::string& media_folder, + const std::string& prompt, + int max_files, + const std::string& file_type) { + std::vector results; + + auto media_files = utils::find_media_files(media_folder, file_type, max_files); + + if (media_files.empty()) { + std::cout << "❌ 在 " << media_folder << " 中未找到媒体文件" << std::endl; + return results; + } + + std::cout << "📁 找到 " << media_files.size() << " 个媒体文件进行批量分析" << std::endl; + + for (size_t i = 0; i < media_files.size(); ++i) { + const auto& media_path = media_files[i]; + + std::cout << "\n" << std::string(60, '=') << std::endl; + std::cout << "📊 分析第 " << i+1 << "/" << media_files.size() + << " 个文件: " << std::filesystem::path(media_path).filename().string() << std::endl; + + try { + auto file_size = std::filesystem::file_size(media_path); + std::cout << "📏 文件大小: " << file_size << " 字节" << std::endl; + } catch (...) { + std::cout << "⚠️ 无法读取文件大小信息" << std::endl; + } + + AnalysisResult result; + bool is_video = utils::is_video_file(media_path); + + if (is_video) { + std::cout << "🎬 检测到视频文件" << std::endl; + result = analyze_single_video(media_path, prompt); + } else { + std::cout << "🖼️ 检测到图片文件" << std::endl; + + // 显示图片信息 + try { + cv::Mat img = cv::imread(media_path); + if (!img.empty()) { + std::cout << "🖼️ 图片尺寸: " << img.cols << "x" << img.rows << std::endl; + } else { + std::cout << "⚠️ 无法读取图片尺寸信息" << std::endl; + } + } catch (...) { + std::cout << "⚠️ 无法读取图片尺寸信息" << std::endl; + } + + result = analyze_single_image(media_path, prompt); + } + + if (result.success) { + std::cout << "✅ 分析成功!" << std::endl; + std::cout << "⏱️ 响应时间: " << result.response_time << "秒" << std::endl; + std::cout << "📝 分析结果: " << result.content << std::endl; + + auto tags = extract_tags(result.content); + if (!tags.empty()) { + std::cout << "🏷️ 提取标签: "; + for (size_t j = 0; j < tags.size(); ++j) { + if (j > 0) std::cout << ", "; + std::cout << tags[j]; + } + std::cout << std::endl; + } + } else { + std::cout << "❌ 分析失败: " << result.error << std::endl; + } + + // 添加文件信息 + result.raw_response["file"] = std::filesystem::path(media_path).filename().string(); + result.raw_response["path"] = media_path; + result.raw_response["type"] = is_video ? "video" : "image"; + + results.push_back(result); + + // 添加延迟避免频繁调用 + // if (i < media_files.size() - 1) { + // //std::cout << "⏳ 等待3秒后继续..." << std::endl; + // //utils::sleep_seconds(3); + // } + } + + return results; +} + +std::vector DoubaoMediaAnalyzer::extract_tags(const std::string& content) { + return utils::extract_tags(content); +} + +// 私有方法实现 +std::vector DoubaoMediaAnalyzer::extract_video_frames(const std::string& video_path, int num_frames) { + std::vector frames_base64; + + try { + cv::VideoCapture cap(video_path); + if (!cap.isOpened()) { + throw std::runtime_error("无法打开视频文件"); + } + + int total_frames = static_cast(cap.get(cv::CAP_PROP_FRAME_COUNT)); + double fps = cap.get(cv::CAP_PROP_FPS); + double duration = (fps > 0) ? total_frames / fps : 0; + + std::cout << "📹 视频信息: " << total_frames << "帧, " + << fps << "FPS, " << duration << "秒" << std::endl; + + // 计算提取帧的位置 + std::vector frame_positions; + if (total_frames <= num_frames) { + for (int i = 0; i < total_frames; ++i) { + frame_positions.push_back(i); + } + } else { + int step = total_frames / num_frames; + for (int i = 0; i < num_frames; ++i) { + frame_positions.push_back(i * step); + } + frame_positions.push_back(total_frames - 1); // 确保包含最后一帧 + } + + for (size_t i = 0; i < frame_positions.size(); ++i) { + cap.set(cv::CAP_PROP_POS_FRAMES, frame_positions[i]); + cv::Mat frame; + bool ret = cap.read(frame); + + if (ret && !frame.empty()) { + // 调整帧大小以控制文件大小 + cv::Mat resized_frame = utils::resize_image(frame, 800); + + // 编码为base64 + auto jpeg_data = utils::encode_image_to_jpeg(resized_frame, 85); + std::string frame_base64 = utils::base64_encode(jpeg_data); + frames_base64.push_back(frame_base64); + + std::cout << " 提取第" << i+1 << "/" << frame_positions.size() + << "帧 (位置: " << frame_positions[i] << "/" << total_frames << ")" << std::endl; + } + } + + cap.release(); + + } catch (const std::exception& e) { + throw std::runtime_error("视频帧提取失败: " + std::string(e.what())); + } + + return frames_base64; +} + +AnalysisResult DoubaoMediaAnalyzer::send_analysis_request(const nlohmann::json& payload, int timeout) { + AnalysisResult result; + + try { + std::vector headers = { + "Authorization: Bearer " + api_key_, + "Content-Type: application/json" + }; + + std::string payload_str = payload.dump(); + std::string response = make_http_request(base_url_, "POST", payload_str, headers, timeout); + + return process_response(response, 0); // response_time will be set by caller + + } catch (const std::exception& e) { + result.success = false; + result.error = "HTTP请求异常: " + std::string(e.what()); + return result; + } +} + +AnalysisResult DoubaoMediaAnalyzer::process_response(const std::string& response_text, double response_time) { + AnalysisResult result; + result.response_time = response_time; + + try { + auto json_response = nlohmann::json::parse(response_text); + + if (json_response.contains("choices") && json_response["choices"].is_array() && + !json_response["choices"].empty()) { + + auto choice = json_response["choices"][0]; + if (choice.contains("message") && choice["message"].contains("content")) { + result.success = true; + result.content = choice["message"]["content"].get(); + + if (json_response.contains("usage")) { + result.usage = json_response["usage"]; + } + + result.raw_response = json_response; + } else { + result.success = false; + result.error = "响应格式异常: 缺少content字段"; + } + } else { + result.success = false; + result.error = "响应格式异常: " + response_text; + } + + } catch (const nlohmann::json::parse_error& e) { + result.success = false; + result.error = "JSON解析失败: " + std::string(e.what()) + " - Response: " + response_text; + } catch (const std::exception& e) { + result.success = false; + result.error = "处理响应异常: " + std::string(e.what()); + } + + return result; +} + +std::string DoubaoMediaAnalyzer::make_http_request(const std::string& url, + const std::string& method, + const std::string& data, + const std::vector& headers, + int timeout) { + CURL* curl = curl_easy_init(); + if (!curl) { + throw std::runtime_error("Failed to initialize CURL"); + } + + std::string response; + + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method.c_str()); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data.c_str()); + curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, data.length()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout); + + // 设置headers + struct curl_slist* header_list = nullptr; + for (const auto& header : headers) { + header_list = curl_slist_append(header_list, header.c_str()); + } + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, header_list); + + CURLcode res = curl_easy_perform(curl); + + curl_slist_free_all(header_list); + curl_easy_cleanup(curl); + + if (res != CURLE_OK) { + throw std::runtime_error("HTTP请求失败: " + std::string(curl_easy_strerror(res))); + } + + return response; +} + +std::vector DoubaoMediaAnalyzer::analyze_batch_concurrent( + const std::vector& file_paths, + const std::string& prompt, + int max_frames, + int frame_interval, + int max_concurrent) { + + // 初始化线程池 + if (!thread_pool) { + thread_pool = std::make_shared(max_concurrent); + } + + std::vector> futures; + std::vector results; + + // 提交所有任务到线程池 + for (const auto& file_path : file_paths) { + auto future = thread_pool->enqueue([this, file_path, prompt, max_frames, frame_interval]() -> BatchAnalysisResult { + BatchAnalysisResult batch_result; + batch_result.filename = file_path; + + auto start_time = std::chrono::high_resolution_clock::now(); + + try { + AnalysisResult result = this->analyze_single_video(file_path, prompt, 2000, max_frames); + batch_result.success = result.success; + batch_result.result = result.content; // 修复:使用正确的字段名 + if (result.success) { + batch_result.tags = utils::extract_tags(result.content); // 修复:使用正确的字段名 + } else { + batch_result.error_message = result.error; + } + } catch (const std::exception& e) { + batch_result.success = false; + batch_result.error_message = e.what(); + } + + auto end_time = std::chrono::high_resolution_clock::now(); + batch_result.processing_time = + std::chrono::duration(end_time - start_time).count(); + + return batch_result; + }); + + futures.push_back(std::move(future)); + } + + // 收集结果 + for (auto& future : futures) { + results.push_back(future.get()); + } + + return results; +} \ No newline at end of file diff --git a/third_party/cpp_analyzelib/src/ThreadPool.cpp b/third_party/cpp_analyzelib/src/ThreadPool.cpp new file mode 100644 index 0000000..8d5c094 --- /dev/null +++ b/third_party/cpp_analyzelib/src/ThreadPool.cpp @@ -0,0 +1,33 @@ +#include "../include/ThreadPool.h" +#include + +ThreadPool::ThreadPool(size_t threads) : stop(false) { + for(size_t i = 0; i < threads; ++i) { + workers.emplace_back([this] { + for(;;) { + std::function task; + { + std::unique_lock lock(this->queue_mutex); + this->condition.wait(lock, [this] { + return this->stop || !this->tasks.empty(); + }); + if(this->stop && this->tasks.empty()) + return; + task = std::move(this->tasks.front()); + this->tasks.pop(); + } + task(); + } + }); + } +} + +ThreadPool::~ThreadPool() { + { + std::unique_lock lock(queue_mutex); + stop = true; + } + condition.notify_all(); + for(std::thread &worker : workers) + worker.join(); +} \ No newline at end of file diff --git a/third_party/cpp_analyzelib/src/config.cpp b/third_party/cpp_analyzelib/src/config.cpp new file mode 100644 index 0000000..356ced1 --- /dev/null +++ b/third_party/cpp_analyzelib/src/config.cpp @@ -0,0 +1,28 @@ +#include "../include/config.hpp" + +namespace config +{ + // API配置 + const std::string BASE_URL = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"; + const std::string MODEL_NAME = "doubao-1.5-vision-lite-250315"; + + // 默认值 + const int DEFAULT_MAX_TOKENS = 1500; + const int DEFAULT_VIDEO_FRAMES = 5; + const int DEFAULT_MAX_FILES = 5; + const double DEFAULT_TEMPERATURE = 0.1; + + // 超时设置(秒) + const int CONNECTION_TIMEOUT = 10; + const int IMAGE_ANALYSIS_TIMEOUT = 60; + const int VIDEO_ANALYSIS_TIMEOUT = 120; + + // 文件扩展名 + const std::vector IMAGE_EXTENSIONS = { + ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp", + ".JPG", ".JPEG", ".PNG", ".BMP", ".TIFF", ".WEBP"}; + + const std::vector VIDEO_EXTENSIONS = { + ".mp4", ".avi", ".mov", ".mkv", ".flv", ".wmv", + ".MP4", ".AVI", ".MOV", ".MKV", ".FLV", ".WMV"}; +} diff --git a/third_party/cpp_analyzelib/src/main.cpp b/third_party/cpp_analyzelib/src/main.cpp new file mode 100644 index 0000000..70c541e --- /dev/null +++ b/third_party/cpp_analyzelib/src/main.cpp @@ -0,0 +1,534 @@ +#include "../include/DoubaoMediaAnalyzer.hpp" +#include "../include/utils.hpp" +#include "../include/config.hpp" +#include +#include +#include +#include + +// 提示词函数 +std::string get_image_prompt() +{ + return R"(请仔细观察图片内容,为图片生成合适的标签。要求: +1. 仔细观察图片的各个细节 +2. 生成的标签要准确反映图片内容 +3. 标签数量不超过5个 +4. 输出格式:通过分析图片,生成的标签为:['标签1', '标签2', '标签3'])"; +} + +std::string get_video_prompt() +{ + return R"(请仔细观察视频的关键帧内容,为视频生成合适的标签。要求: +1. 综合分析视频的整体内容和关键帧 +2. 生成的标签要准确反映视频的主题、场景、动作等 +3. 标签数量不超过8个 +4. 输出格式:通过分析视频,生成的标签为:['标签1', '标签2', '标签3'])"; +} + +void print_usage() +{ + std::cout << "用法: doubao_analyzer [选项]" << std::endl; + std::cout << "选项:" << std::endl; + std::cout << " --api-key KEY 豆包API密钥 (必需)" << std::endl; + std::cout << " --image PATH 单张图片路径" << std::endl; + std::cout << " --video PATH 单个视频路径" << std::endl; + std::cout << " --folder PATH 媒体文件夹路径" << std::endl; + std::cout << " --file-type TYPE 分析的文件类型 [all|image|video] (默认: all)" << std::endl; + std::cout << " --prompt TEXT 自定义提示词" << std::endl; + std::cout << " --max-files NUM 最大分析文件数量 (默认: 5)" << std::endl; + std::cout << " --video-frames NUM 视频提取帧数 (默认: 5)" << std::endl; + std::cout << " --output PATH 结果保存路径" << std::endl; + std::cout << " --help 显示此帮助信息" << std::endl; + std::cout << std::endl; + std::cout << "示例:" << std::endl; + std::cout << " doubao_analyzer --api-key YOUR_KEY --image test.jpg" << std::endl; + std::cout << " doubao_analyzer --api-key YOUR_KEY --video test.mp4 --video-frames 8" << std::endl; + std::cout << " doubao_analyzer --api-key YOUR_KEY --folder ./media --file-type all" << std::endl; +} + +void print_result(const AnalysisResult &result, const std::string &media_type) +{ + if (result.success) + { + std::cout << "✅ " << media_type << "分析成功!" << std::endl; + std::cout << "⏱️ 响应时间: " << result.response_time << "秒" << std::endl; + std::cout << "📝 分析结果:" << std::endl + << result.content << std::endl; + + auto tags = utils::extract_tags(result.content); + if (!tags.empty()) + { + std::cout << "🏷️ 提取标签: "; + for (size_t i = 0; i < tags.size(); ++i) + { + if (i > 0) + std::cout << ", "; + std::cout << tags[i]; + } + std::cout << std::endl; + } + } + else + { + std::cout << "❌ " << media_type << "分析失败: " << result.error << std::endl; + } +} + +void print_statistics(const std::vector &results) +{ + int success_count = 0; + int total_count = results.size(); + int video_count = 0; + int image_count = 0; + + double total_time = 0; + double video_total_time = 0; + double image_total_time = 0; + int video_success_count = 0; + int image_success_count = 0; + + for (const auto &result : results) + { + if (result.success) + { + success_count++; + total_time += result.response_time; + } + + if (result.raw_response.contains("type")) + { + std::string type = result.raw_response["type"]; + if (type == "video") + { + video_count++; + if (result.success) + { + video_total_time += result.response_time; + video_success_count++; + } + } + else if (type == "image") + { + image_count++; + if (result.success) + { + image_total_time += result.response_time; + image_success_count++; + } + } + } + } + + std::cout << "\n📊 分析统计:" << std::endl; + std::cout << " 总文件数: " << total_count << std::endl; + std::cout << " 成功分析: " << success_count << "/" << total_count << std::endl; + std::cout << " 图片文件: " << image_count << std::endl; + std::cout << " 视频文件: " << video_count << std::endl; + + if (success_count > 0) + { + double avg_time = total_time / success_count; + std::cout << "⏱️ 平均响应时间: " << avg_time << "秒" << std::endl; + + if (image_success_count > 0) + { + double avg_image_time = image_total_time / image_success_count; + std::cout << " 图片平均时间: " << avg_image_time << "秒" << std::endl; + } + + if (video_success_count > 0) + { + double avg_video_time = video_total_time / video_success_count; + std::cout << " 视频平均时间: " << avg_video_time << "秒" << std::endl; + } + } +} + +void interactive_mode() +{ + std::string api_key; + std::cout << "请输入豆包API密钥: "; + std::getline(std::cin, api_key); + + if (api_key.empty()) + { + std::cout << "❌ API密钥不能为空" << std::endl; + return; + } + + DoubaoMediaAnalyzer analyzer(api_key); + + if (!analyzer.test_connection()) + { + return; + } + + while (true) + { + std::cout << "\n" + << std::string(50, '=') << std::endl; + std::cout << "1. 分析单张图片" << std::endl; + std::cout << "2. 分析单个视频" << std::endl; + std::cout << "3. 批量分析文件夹" << std::endl; + std::cout << "4. 测试API连接" << std::endl; + std::cout << "5. 退出" << std::endl; + + std::string choice; + std::cout << "请选择操作 (1-5): "; + std::getline(std::cin, choice); + + if (choice == "1") + { + std::string image_path; + std::cout << "请输入图片路径(直接回车使用默认./test/test.jpg): "; + std::getline(std::cin, image_path); + if (image_path.empty()) + { + image_path = "./test/test.jpg"; + } + + if (utils::file_exists(image_path)) + { + std::string prompt; + std::cout << "请输入提示词 (直接回车使用默认): "; + std::getline(std::cin, prompt); + if (prompt.empty()) + { + prompt = get_image_prompt(); + } + + auto result = analyzer.analyze_single_image(image_path, prompt); + print_result(result, "图片"); + } + else + { + std::cout << "❌ 图片文件不存在" << std::endl; + } + } + else if (choice == "2") + { + std::string video_path; + std::cout << "请输入视频路径(直接回车使用默认./test/test.mp4): "; + std::getline(std::cin, video_path); + if (video_path.empty()) + { + video_path = "./test/test.mp4"; + } + + if (utils::file_exists(video_path)) + { + std::string prompt; + std::cout << "请输入提示词 (直接回车使用默认): "; + std::getline(std::cin, prompt); + if (prompt.empty()) + { + prompt = get_video_prompt(); + } + + std::string frames_input; + std::cout << "提取帧数 (默认5): "; + std::getline(std::cin, frames_input); + int num_frames = frames_input.empty() ? 5 : std::stoi(frames_input); + + std::cout << "🎬 开始分析视频..." << std::endl; + auto result = analyzer.analyze_single_video(video_path, prompt, 2000, num_frames); + print_result(result, "视频"); + } + else + { + std::cout << "❌ 视频文件不存在" << std::endl; + } + } + else if (choice == "3") + { + std::string folder_path; + std::cout << "请输入媒体文件夹路径: "; + std::getline(std::cin, folder_path); + + if (utils::file_exists(folder_path)) + { + std::cout << "选择分析类型:" << std::endl; + std::cout << "1. 所有文件 (图片+视频)" << std::endl; + std::cout << "2. 仅图片" << std::endl; + std::cout << "3. 仅视频" << std::endl; + + std::string type_choice; + std::cout << "请选择 (1-3, 默认1): "; + std::getline(std::cin, type_choice); + + std::string file_type = "all"; + if (type_choice == "2") + file_type = "image"; + else if (type_choice == "3") + file_type = "video"; + + std::string max_files_input; + std::cout << "最大分析数量 (默认5): "; + std::getline(std::cin, max_files_input); + int max_files = max_files_input.empty() ? 5 : std::stoi(max_files_input); + + std::string prompt; + std::cout << "请输入提示词 (直接回车使用默认): "; + std::getline(std::cin, prompt); + if (prompt.empty()) + { + prompt = (file_type == "video") ? get_video_prompt() : get_image_prompt(); + } + + auto results = analyzer.batch_analyze(folder_path, prompt, max_files, file_type); + print_statistics(results); + } + else + { + std::cout << "❌ 文件夹不存在" << std::endl; + } + } + else if (choice == "4") + { + analyzer.test_connection(); + } + else if (choice == "5") + { + std::cout << "👋 再见!" << std::endl; + break; + } + else + { + std::cout << "❌ 无效选择" << std::endl; + } + } +} + +int main(int argc, char *argv[]) +{ + // 检查命令行参数 + if (argc == 1) + { + interactive_mode(); + return 0; + } + + // 解析命令行参数 + std::string api_key; + std::string image_path; + std::string video_path; + std::string folder_path; + std::string file_type = "all"; + std::string prompt; + std::string output_path; + int max_files = 5; + int video_frames = 5; // 默认提取5帧 + + for (int i = 1; i < argc; ++i) + { + std::string arg = argv[i]; + + if (arg == "--help") + { + print_usage(); + return 0; + } + else if (arg == "--api-key" && i + 1 < argc) + { + api_key = argv[++i]; + } + else if (arg == "--image" && i + 1 < argc) + { + image_path = argv[++i]; + } + else if (arg == "--video" && i + 1 < argc) + { + video_path = argv[++i]; + } + else if (arg == "--folder" && i + 1 < argc) + { + folder_path = argv[++i]; + } + else if (arg == "--file-type" && i + 1 < argc) + { + file_type = argv[++i]; + } + else if (arg == "--prompt" && i + 1 < argc) + { + prompt = argv[++i]; + } + else if (arg == "--max-files" && i + 1 < argc) + { + max_files = std::stoi(argv[++i]); + } + else if (arg == "--video-frames" && i + 1 < argc) + { + video_frames = std::stoi(argv[++i]); + } + else if (arg == "--output" && i + 1 < argc) + { + output_path = argv[++i]; + } + } + + if (api_key.empty()) + { + std::cout << "❌ 必须提供API密钥,使用 --api-key 参数" << std::endl; + print_usage(); + return 1; + } + + // 创建分析器 + DoubaoMediaAnalyzer analyzer(api_key); + + std::cout << "🚀 豆包大模型媒体分析调试工具(支持图片和视频)" << std::endl; + std::cout << std::string(60, '=') << std::endl; + + // 测试连接 + if (!analyzer.test_connection()) + { + return 1; + } + + std::vector results; + + // 单张图片分析 + if (!image_path.empty()) + { + std::cout << "\n📸 分析单张图片: " << image_path << std::endl; + std::string analysis_prompt = prompt.empty() ? get_image_prompt() : prompt; + auto result = analyzer.analyze_single_image(image_path, analysis_prompt); + print_result(result, "图片"); + + result.raw_response["file"] = std::filesystem::path(image_path).filename().string(); + result.raw_response["path"] = image_path; + result.raw_response["type"] = "image"; + results.push_back(result); + } + + // 单个视频分析 + if (!video_path.empty()) + { + std::cout << "\n🎬 分析单个视频: " << video_path << std::endl; + std::string analysis_prompt = prompt.empty() ? get_video_prompt() : prompt; + auto result = analyzer.analyze_single_video(video_path, analysis_prompt, 2000, video_frames); + print_result(result, "视频"); + + result.raw_response["file"] = std::filesystem::path(video_path).filename().string(); + result.raw_response["path"] = video_path; + result.raw_response["type"] = "video"; + results.push_back(result); + } + + // 批量媒体分析 + if (!folder_path.empty()) + { + // old + // std::cout << "\n📁 批量分析文件夹: " << folder_path << " (文件类型: " << file_type << ")" << std::endl; + // std::string analysis_prompt = prompt.empty() ? (file_type == "video" ? get_video_prompt() : get_image_prompt()) : prompt; + + // auto batch_results = analyzer.batch_analyze(folder_path, analysis_prompt, max_files, file_type); + // results.insert(results.end(), batch_results.begin(), batch_results.end()); + //end old + + //new 处理并发处理 + try { + // 获取文件列表 + auto files = utils::find_media_files(folder_path, "all",max_files); + std::cout << "📁 找到 " << files.size() << " 个媒体文件进行并发分析" << std::endl; + + auto start_time = std::chrono::high_resolution_clock::now(); + + // 并发分析所有文件 + auto results = analyzer.analyze_batch_concurrent( + files, + get_video_prompt() , // 使用视频提示词 + 5, // max_frames + 1, // frame_interval + 3 // 并发数,根据API限制调整 + ); + + auto end_time = std::chrono::high_resolution_clock::now(); + double total_time = std::chrono::duration(end_time - start_time).count(); + + // 输出结果 + int success_count = 0; + int image_count = 0; + int video_count = 0; + double total_processing_time = 0; + + for (size_t i = 0; i < results.size(); ++i) { + const auto& result = results[i]; + std::string extension = utils::get_file_extension(result.filename); + + if (extension == ".jpg" || extension == ".jpeg" || extension == ".png") { + image_count++; + } else { + video_count++; + } + + std::cout << "\n============================================================" << std::endl; + std::cout << "📊 分析第 " << (i+1) << "/" << results.size() << " 个文件: " + << utils::get_filename(result.filename) << std::endl; + + if (result.success) { + success_count++; + total_processing_time += result.processing_time; + + std::cout << "✅ 分析成功!" << std::endl; + std::cout << "⏱️ 响应时间: " << result.processing_time << "秒" << std::endl; + std::cout << "📝 分析结果: " << result.result << std::endl; + + if (!result.tags.empty()) { + std::cout << "🏷️ 提取标签: "; + for (size_t j = 0; j < result.tags.size(); ++j) { + std::cout << result.tags[j]; + if (j < result.tags.size() - 1) std::cout << ", "; + } + std::cout << std::endl; + } + } else { + std::cout << "❌ 分析失败: " << result.error_message << std::endl; + } + } + + // 输出统计信息 + std::cout << "\n📊 并发分析统计:" << std::endl; + std::cout << " 总文件数: " << results.size() << std::endl; + std::cout << " 成功分析: " << success_count << "/" << results.size() << std::endl; + std::cout << " 图片文件: " << image_count << std::endl; + std::cout << " 视频文件: " << video_count << std::endl; + std::cout << "⏱️ 总耗时: " << total_time << "秒" << std::endl; + std::cout << " 平均响应时间: " << (success_count > 0 ? total_processing_time / success_count : 0) << "秒" << std::endl; + std::cout << " 并发加速比: " << (success_count > 0 ? total_processing_time / total_time : 0) << "倍" << std::endl; + + } catch (const std::exception& e) { + std::cerr << "❌ 程序异常: " << e.what() << std::endl; + } + + } + + // 保存结果 + if (!output_path.empty() && !results.empty()) + { + try + { + nlohmann::json output_json = nlohmann::json::array(); + for (const auto &result : results) + { + output_json.push_back(result.raw_response); + } + + std::ofstream file(output_path); + file << output_json.dump(2) << std::endl; + std::cout << "\n💾 结果已保存到: " << output_path << std::endl; + } + catch (const std::exception &e) + { + std::cout << "❌ 保存结果失败: " << e.what() << std::endl; + } + } + + // 统计信息 + if (!results.empty()) + { + print_statistics(results); + } + + return 0; +} diff --git a/third_party/cpp_analyzelib/src/utils.cpp b/third_party/cpp_analyzelib/src/utils.cpp new file mode 100644 index 0000000..2b6751e --- /dev/null +++ b/third_party/cpp_analyzelib/src/utils.cpp @@ -0,0 +1,276 @@ +#include "../include/utils.hpp" +#include "../include/config.hpp" +#include +#include +#include +#include +#include +#include + +namespace utils { + +// 字符串工具 +std::string to_lower(const std::string& str) { + std::string result = str; + std::transform(result.begin(), result.end(), result.begin(), ::tolower); + return result; +} + +std::vector split(const std::string& str, char delimiter) { + std::vector tokens; + std::stringstream ss(str); + std::string token; + + while (std::getline(ss, token, delimiter)) { + if (!token.empty()) { + tokens.push_back(token); + } + } + + return tokens; +} + +std::string trim(const std::string& str) { + return trim(str, " \t\n\r"); +} + +std::string trim(const std::string& str, const std::string& chars_to_trim) { + size_t start = str.find_first_not_of(chars_to_trim); + if (start == std::string::npos) return ""; + + size_t end = str.find_last_not_of(chars_to_trim); + return str.substr(start, end - start + 1); +} + +bool starts_with(const std::string& str, const std::string& prefix) { + return str.size() >= prefix.size() && + str.compare(0, prefix.size(), prefix) == 0; +} + +bool ends_with(const std::string& str, const std::string& suffix) { + return str.size() >= suffix.size() && + str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; +} + +// 文件工具 +bool file_exists(const std::string& path) { + return std::filesystem::exists(path); +} + +std::string get_file_extension(const std::string& path) { + std::filesystem::path p(path); + return p.extension().string(); +} + +bool is_image_file(const std::string& path) { + std::string ext = to_lower(get_file_extension(path)); + return std::find(config::IMAGE_EXTENSIONS.begin(), + config::IMAGE_EXTENSIONS.end(), ext) != config::IMAGE_EXTENSIONS.end(); +} + +bool is_video_file(const std::string& path) { + std::string ext = to_lower(get_file_extension(path)); + return std::find(config::VIDEO_EXTENSIONS.begin(), + config::VIDEO_EXTENSIONS.end(), ext) != config::VIDEO_EXTENSIONS.end(); +} + +std::vector find_media_files(const std::string& folder, + const std::string& file_type, + int max_files) { + std::vector files; + + try { + for (const auto& entry : std::filesystem::directory_iterator(folder)) { + if (files.size() >= max_files) break; + + if (entry.is_regular_file()) { + std::string path = entry.path().string(); + + if (file_type == "all") { + if (is_image_file(path) || is_video_file(path)) { + files.push_back(path); + } + } else if (file_type == "image" && is_image_file(path)) { + files.push_back(path); + } else if (file_type == "video" && is_video_file(path)) { + files.push_back(path); + } + } + } + } catch (const std::filesystem::filesystem_error& e) { + std::cerr << "Error accessing folder: " << e.what() << std::endl; + } + + return files; +} + +// Base64编码 +std::string base64_encode(const std::vector& data) { + static const std::string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + + std::string encoded; + int i = 0; + int j = 0; + unsigned char char_array_3[3]; + unsigned char char_array_4[4]; + + for (const auto& byte : data) { + char_array_3[i++] = byte; + if (i == 3) { + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for(i = 0; i < 4; i++) { + encoded += base64_chars[char_array_4[i]]; + } + i = 0; + } + } + + if (i > 0) { + for(j = i; j < 3; j++) { + char_array_3[j] = '\0'; + } + + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for (j = 0; j < i + 1; j++) { + encoded += base64_chars[char_array_4[j]]; + } + + while(i++ < 3) { + encoded += '='; + } + } + + return encoded; +} + +std::string base64_encode_file(const std::string& file_path) { + std::ifstream file(file_path, std::ios::binary); + if (!file) { + throw std::runtime_error("Cannot open file: " + file_path); + } + + std::vector buffer(std::istreambuf_iterator(file), {}); + return base64_encode(buffer); +} + +// 图像处理 +std::vector encode_image_to_jpeg(const cv::Mat& image, int quality) { + std::vector buffer; + std::vector params = {cv::IMWRITE_JPEG_QUALITY, quality}; + cv::imencode(".jpg", image, buffer, params); + return buffer; +} + +cv::Mat resize_image(const cv::Mat& image, int max_size) { + int height = image.rows; + int width = image.cols; + + if (std::max(height, width) <= max_size) { + return image.clone(); + } + + double scale = static_cast(max_size) / std::max(height, width); + int new_width = static_cast(width * scale); + int new_height = static_cast(height * scale); + + cv::Mat resized; + cv::resize(image, resized, cv::Size(new_width, new_height)); + return resized; +} + +// JSON工具 +nlohmann::json parse_json(const std::string& json_str) { + return nlohmann::json::parse(json_str); +} + +std::string json_to_string(const nlohmann::json& j) { + return j.dump(); +} + +// 标签提取 +std::vector extract_tags(const std::string& content) { + std::vector tags; + + try { + // 查找数组格式 ['tag1', 'tag2'] + size_t start = content.find("['"); + size_t end = content.find("']"); + + if (start != std::string::npos && end != std::string::npos && start < end) { + std::string tags_str = content.substr(start + 2, end - start - 2); + auto temp_tags = split(tags_str, ','); + + for (const auto& tag : temp_tags) { + std::string clean_tag = trim(tag); + clean_tag = trim(clean_tag, "'\""); + if (!clean_tag.empty()) { + tags.push_back(clean_tag); + } + } + + if (!tags.empty()) return tags; + } + + // 正则表达式匹配其他格式 + std::regex pattern1(R"(标签[::]\s*([^。,!?!?]+))"); + std::regex pattern2(R"(['"]([^'"]+)['"])"); + std::regex pattern3(R"(([^,,、]+?)(?=,|,|、|$))"); + + std::smatch matches; + + if (std::regex_search(content, matches, pattern1) && matches.size() > 1) { + auto temp_tags = split(matches[1].str(), ','); + for (const auto& tag : temp_tags) { + std::string clean_tag = trim(tag); + if (!clean_tag.empty()) { + tags.push_back(clean_tag); + } + } + } + + // 去重并限制数量 + std::sort(tags.begin(), tags.end()); + tags.erase(std::unique(tags.begin(), tags.end()), tags.end()); + + if (tags.size() > 5) { + tags.resize(5); + } + + } catch (const std::exception& e) { + std::cerr << "Error extracting tags: " << e.what() << std::endl; + } + + return tags; +} + +// 时间工具 +double get_current_time() { + auto now = std::chrono::steady_clock::now(); + return std::chrono::duration(now.time_since_epoch()).count(); +} + +void sleep_seconds(int seconds) { + std::this_thread::sleep_for(std::chrono::seconds(seconds)); +} + +// 或者更兼容的版本 +std::string get_filename(const std::string& filepath) { + size_t last_slash = filepath.find_last_of("/\\"); + if (last_slash != std::string::npos) { + return filepath.substr(last_slash + 1); + } + return filepath; +} + +} // namespace utils diff --git a/third_party/cpp_analyzelib/test/test.jpg b/third_party/cpp_analyzelib/test/test.jpg new file mode 100644 index 0000000..d88738c Binary files /dev/null and b/third_party/cpp_analyzelib/test/test.jpg differ diff --git a/third_party/cpp_analyzelib/test/test_config.cpp b/third_party/cpp_analyzelib/test/test_config.cpp new file mode 100644 index 0000000..a606c01 --- /dev/null +++ b/third_party/cpp_analyzelib/test/test_config.cpp @@ -0,0 +1,71 @@ +#include +#include +#include "DoubaoMediaAnalyzer.hpp" +#include "utils.hpp" + +void test_basic_functionality() { + std::cout << "🧪 测试基本功能..." << std::endl; + + // 测试文件工具 + std::string test_file = "test/test.jpg"; + if (utils::file_exists(test_file)) { + std::cout << "✅ 文件存在检查: 通过" << std::endl; + } else { + std::cout << "❌ 文件存在检查: 失败" << std::endl; + } + + // 测试Base64编码 + std::vector test_data = {'H', 'e', 'l', 'l', 'o'}; + std::string encoded = utils::base64_encode(test_data); + std::cout << "✅ Base64编码测试: " << encoded << std::endl; + + // 测试字符串工具 + std::string test_str = " Hello World "; + std::string trimmed = utils::trim(test_str); + std::cout << "✅ 字符串修剪测试: '" << trimmed << "'" << std::endl; + + std::cout << "✅ 基本功能测试完成" << std::endl; +} + +void test_opencv() { + std::cout << "🧪 测试OpenCV功能..." << std::endl; + + // 创建一个测试图像 + cv::Mat test_image(100, 100, CV_8UC3, cv::Scalar(255, 0, 0)); + + // 测试图像编码 + auto jpeg_data = utils::encode_image_to_jpeg(test_image, 85); + if (!jpeg_data.empty()) { + std::cout << "✅ 图像编码测试: 通过 (" << jpeg_data.size() << " bytes)" << std::endl; + } else { + std::cout << "❌ 图像编码测试: 失败" << std::endl; + } + + // 测试图像缩放 + cv::Mat resized = utils::resize_image(test_image, 50); + if (resized.cols <= 50 && resized.rows <= 50) { + std::cout << "✅ 图像缩放测试: 通过 (" << resized.cols << "x" << resized.rows << ")" << std::endl; + } else { + std::cout << "❌ 图像缩放测试: 失败" << std::endl; + } + + std::cout << "✅ OpenCV功能测试完成" << std::endl; +} + +int main() { + std::cout << "🚀 开始豆包分析器功能测试..." << std::endl; + + try { + test_basic_functionality(); + test_opencv(); + + std::cout << "\n🎉 所有测试完成!" << std::endl; + std::cout << "💡 提示: 运行完整测试需要配置API密钥" << std::endl; + + } catch (const std::exception& e) { + std::cout << "❌ 测试失败: " << e.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/third_party/cpp_llmlib/llmlib_openai_test.cpp b/third_party/cpp_llmlib/llmlib_openai_test.cpp index 381bf36..144495c 100644 --- a/third_party/cpp_llmlib/llmlib_openai_test.cpp +++ b/third_party/cpp_llmlib/llmlib_openai_test.cpp @@ -8,11 +8,11 @@ using namespace llmlib; * 2. api key */ int main() { - LLMClient llmcli("https://dashscope.aliyuncs.com/compatible-mode/v1", "sk-XXX", LLMBackendType::OpenAI); + LLMClient llmcli("https://ark.cn-beijing.volces.com/api/v3", "sk_key", LLMBackendType::OpenAI); /** * 1. test for simple chat */ - auto res1 = llmcli.simple_chat("qwen2.5-vl-7b-instruct", "坐飞机为什么要付机票钱?", {}, {{"temperature", 0.1}, {"top_k", 1}}); + auto res1 = llmcli.simple_chat("doubao-1.5-vision-lite-250315", "坐飞机为什么要付机票钱?", {}, {{"temperature", 0.1}, {"top_k", 1}}); std::cout << "------test for simple chat-----" << std::endl; std::cout << res1 << std::endl; std::cout << "-------------------------------" << std::endl; @@ -20,9 +20,9 @@ int main() { /** * 2. test for simple chat with images */ - auto image1 = cv::imread("/windows2/zhzhi/github/vp_data/test_images/vehicle/0.jpg"); - auto image2 = cv::imread("/windows2/zhzhi/github/vp_data/test_images/vehicle/27.jpg"); - auto res2 = llmcli.simple_chat("qwen2.5-vl-7b-instruct", "描述这两幅图片的差异", {image1, image2}, {}); + auto image1 = cv::imread("./vp_data/test_images/vehicle/0.jpg"); + auto image2 = cv::imread("./vp_data/test_images/vehicle/27.jpg"); + auto res2 = llmcli.simple_chat("doubao-1.5-vision-lite-250315", "描述这两幅图片的差异", {image1, image2}, {}); std::cout << "-----test for simple chat with images-----" << std::endl; std::cout << res2 << std::endl; std::cout << "------------------------------------------" << std::endl; @@ -48,7 +48,8 @@ int main() { "top_k": 2 } )"_json; - auto res3 = llmcli.chat("qwen2.5-vl-7b-instruct", messages, options); + auto res3 = llmcli.chat("doubao-1.5-vision-lite-250315", messages, options); + std::cout << "---------test for chat---------" << std::endl; std::cout << res3 << std::endl; std::cout << "-------------------------------" << std::endl; @@ -72,7 +73,7 @@ int main() { }} } }; - auto res4 = llmcli.chat("qwen2.5-vl-7b-instruct", messages4, {{"temperature", 0.1}, {"top_k", 1}}); + auto res4 = llmcli.chat("doubao-1.5-vision-lite-250315", messages4, {{"temperature", 0.1}, {"top_k", 1}}); std::cout << "---------test for chat with images---------" << std::endl; std::cout << res4 << std::endl; std::cout << "-------------------------------------------" << std::endl; diff --git a/utils/config_reader.cpp b/utils/config_reader.cpp new file mode 100644 index 0000000..9c4b828 --- /dev/null +++ b/utils/config_reader.cpp @@ -0,0 +1,72 @@ +#include "config_reader.h" +#include +#include +#include +#include + +bool ConfigReader::loadConfig(const std::string& configPath) { + std::ifstream file(configPath); + if (!file.is_open()) { + return false; + } + + configData_.clear(); + std::string currentSection; + std::string line; + + while (std::getline(file, line)) { + trim(line); + + // 跳过空行和注释 + if (line.empty() || line[0] == ';' || line[0] == '#') { + continue; + } + + // 处理节头 [section] + if (line[0] == '[' && line[line.length() - 1] == ']') { + currentSection = line.substr(1, line.length() - 2); + trim(currentSection); + continue; + } + + // 处理键值对 + size_t equalsPos = line.find('='); + if (equalsPos != std::string::npos) { + std::string key = line.substr(0, equalsPos); + std::string value = line.substr(equalsPos + 1); + + trim(key); + trim(value); + + if (!currentSection.empty() && !key.empty()) { + configData_[currentSection][key] = value; + } + } + } + + file.close(); + return true; +} + +std::string ConfigReader::getValue(const std::string& section, const std::string& key, const std::string& defaultValue) { + auto sectionIt = configData_.find(section); + if (sectionIt != configData_.end()) { + auto keyIt = sectionIt->second.find(key); + if (keyIt != sectionIt->second.end()) { + return keyIt->second; + } + } + return defaultValue; +} + +void ConfigReader::trim(std::string& str) { + // 去除左侧空格 + str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](unsigned char ch) { + return !std::isspace(ch); + })); + + // 去除右侧空格 + str.erase(std::find_if(str.rbegin(), str.rend(), [](unsigned char ch) { + return !std::isspace(ch); + }).base(), str.end()); +} diff --git a/utils/config_reader.h b/utils/config_reader.h new file mode 100644 index 0000000..7394791 --- /dev/null +++ b/utils/config_reader.h @@ -0,0 +1,26 @@ +#ifndef CONFIG_READER_H +#define CONFIG_READER_H + +#include +#include + +class ConfigReader { +public: + static ConfigReader& getInstance() { + static ConfigReader instance; + return instance; + } + + bool loadConfig(const std::string& configPath); + std::string getValue(const std::string& section, const std::string& key, const std::string& defaultValue = ""); + +private: + ConfigReader() = default; + ~ConfigReader() = default; + + std::unordered_map> configData_; + + void trim(std::string& str); +}; + +#endif // CONFIG_READER_H diff --git a/vp_analysis_board.png b/vp_analysis_board.png new file mode 100644 index 0000000..6e4a16f Binary files /dev/null and b/vp_analysis_board.png differ