Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 51 additions & 42 deletions src/spider/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
#include <chrono>
#include <cstddef>
#include <functional>
#include <iostream>
#include <memory>
#include <string>
#include <system_error>
#include <thread>

#include <boost/any/bad_any_cast.hpp>
#include <boost/program_options/errors.hpp>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/parsers.hpp>
Expand Down Expand Up @@ -38,33 +38,62 @@ constexpr int cCleanupInterval = 5;
constexpr int cRetryCount = 5;

namespace {
auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map {

char const* const cUsage
= "Usage: spider_scheduler --host <host> --port <port> --storage_url <url>";

auto parse_args(
int const argc,
char** argv,
std::string& host,
unsigned short& port,
std::string& storage_url
) -> bool {
boost::program_options::options_description desc;
desc.add_options()("help", "spider scheduler");
desc.add_options()(
// clang-format off
desc.add_options()
("help", "spider scheduler")
(
"host",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's replace these argument names with constants since we're using them in multiple places.

boost::program_options::value<std::string>(),
boost::program_options::value<std::string>(&host)->required(),
"scheduler host address"
);
desc.add_options()(
)
(
"port",
boost::program_options::value<unsigned short>(),
boost::program_options::value<unsigned short>(&port)->required(),
"port to listen on"
);
desc.add_options()(
)
(
"storage_url",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use a hyphen instead?

boost::program_options::value<std::string>(),
boost::program_options::value<std::string>(&storage_url)->required(),
"storage server url"
);

boost::program_options::variables_map variables;
boost::program_options::store(
// NOLINTNEXTLINE(misc-include-cleaner)
boost::program_options::parse_command_line(argc, argv, desc),
variables
);
boost::program_options::notify(variables);
return variables;
);
// clang-format on

try {
boost::program_options::variables_map variables;
boost::program_options::store(
// NOLINTNEXTLINE(misc-include-cleaner)
boost::program_options::parse_command_line(argc, argv, desc),
variables
);

if (!variables.contains("host") && !variables.contains("port")
&& !variables.contains("storage_url"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Boost's program-options already validates the presence of these arguments since we marked them required.
  • We should validate that the user doesn't pass in an empty value for host and storage-url. If they do, we should print a specific error message for each one.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Boost's program-options validation is not flexible enough. For example, if user does not any argument or only provide --help, Boost cannot correctly print the help message but treat it as arguments not provided and throw exceptions.
  • I'll add more validation for arguments.

{
std::cout << cUsage << "\n";
std::cout << desc << "\n";
return false;
}

boost::program_options::notify(variables);
return true;
} catch (boost::program_options::error& e) {
std::cerr << "spider_scheduler: " << e.what() << "\n";
std::cerr << cUsage << "\n";
std::cerr << "Try 'spider_scheduler --help' for more information.\n";
return false;
}
}

auto heartbeat_loop(
Expand Down Expand Up @@ -137,30 +166,10 @@ auto main(int argc, char** argv) -> int {
spdlog::set_level(spdlog::level::trace);
#endif

boost::program_options::variables_map const args = parse_args(argc, argv);

unsigned short port = 0;
std::string scheduler_addr;
std::string storage_url;
try {
if (!args.contains("port")) {
spdlog::error("port is required");
return cCmdArgParseErr;
}
port = args["port"].as<unsigned short>();
if (!args.contains("host")) {
spdlog::error("host is required");
return cCmdArgParseErr;
}
scheduler_addr = args["host"].as<std::string>();
if (!args.contains("storage_url")) {
spdlog::error("storage_url is required");
return cCmdArgParseErr;
}
storage_url = args["storage_url"].as<std::string>();
} catch (boost::bad_any_cast& e) {
return cCmdArgParseErr;
} catch (boost::program_options::error& e) {
if (!parse_args(argc, argv, scheduler_addr, port, storage_url)) {
return cCmdArgParseErr;
}

Expand Down
97 changes: 54 additions & 43 deletions src/spider/worker/worker.cpp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar comments as on the other file.

Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <cstddef>
#include <cstdlib>
#include <functional>
#include <iostream>
#include <memory>
#include <optional>
#include <stdexcept>
Expand All @@ -11,7 +12,6 @@
#include <vector>

#include <absl/container/flat_hash_map.h>
#include <boost/any/bad_any_cast.hpp>
#include <boost/dll/runtime_symbol_info.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/process/v2/environment.hpp>
Expand Down Expand Up @@ -50,29 +50,62 @@ constexpr int cTaskErr = 5;
constexpr int cRetryCount = 5;

namespace {
auto parse_args(int const argc, char** argv) -> boost::program_options::variables_map {

char const* const cUsage
= "Usage: spider_worker --host <host> --storage_url <storage_url> --libs <libs>";

auto parse_args(
int const argc,
char** argv,
std::string& host,
std::string& storage_url,
std::vector<std::string>& libs
) -> bool {
boost::program_options::options_description desc;
desc.add_options()("help", "spider scheduler");
desc.add_options()(
"storage_url",
boost::program_options::value<std::string>(),
// clang-format off
desc.add_options()
("help", "spider scheduler")
(
"host",
boost::program_options::value<std::string>(&host)->required(),
"worker host address"
)
(
"storage_url",
boost::program_options::value<std::string>(&storage_url)->required(),
"storage server url"
);
desc.add_options()(
)
(
"libs",
boost::program_options::value<std::vector<std::string>>(),
boost::program_options::value<std::vector<std::string>>(&libs),
"dynamic libraries that include the spider tasks"
);
desc.add_options()("host", boost::program_options::value<std::string>(), "worker host address");

boost::program_options::variables_map variables;
boost::program_options::store(
// NOLINTNEXTLINE(misc-include-cleaner)
boost::program_options::parse_command_line(argc, argv, desc),
variables
);
boost::program_options::notify(variables);
return variables;
);
// clang-format on

try {
boost::program_options::variables_map variables;
boost::program_options::store(
// NOLINTNEXTLINE(misc-include-cleaner)
boost::program_options::parse_command_line(argc, argv, desc),
variables
);

if (!variables.contains("host") && !variables.contains("storage_url")
&& !variables.contains("libs"))
{
std::cout << cUsage << "\n";
std::cout << desc << "\n";
return false;
}

boost::program_options::notify(variables);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Move notify() call before validation checks.

The notify() call should happen before the validation checks to ensure that required arguments are properly validated by boost::program_options.

+        boost::program_options::notify(variables);
+
         if (!variables.contains(std::string(spider::core::cHostOption))
             && !variables.contains(std::string(spider::core::cStorageUrlOption))
             && !variables.contains(std::string(spider::core::cLibsOption)))
         {
             std::cout << spider::core::cWorkerUsage << "\n";
             std::cout << desc << "\n";
             return false;
         }
-
-        boost::program_options::notify(variables);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
boost::program_options::notify(variables);
// Ensure all semantic actions and required‐option checks run before manual validation
boost::program_options::notify(variables);
if (!variables.contains(std::string(spider::core::cHostOption))
&& !variables.contains(std::string(spider::core::cStorageUrlOption))
&& !variables.contains(std::string(spider::core::cLibsOption)))
{
std::cout << spider::core::cWorkerUsage << "\n";
std::cout << desc << "\n";
return false;
}
🤖 Prompt for AI Agents
In src/spider/worker/worker.cpp at line 126, move the call to
boost::program_options::notify(variables) to occur before any validation checks
on the program options. This ensures that required arguments are properly
processed and validated by boost::program_options before custom validation logic
runs.

return true;
} catch (boost::program_options::error& e) {
std::cerr << "spider_worker: " << e.what() << "\n";
std::cerr << cUsage << "\n";
std::cerr << "Try 'spider_worker --help' for more information.\n";
return false;
}
}

auto get_environment_variable() -> absl::flat_hash_map<
Expand Down Expand Up @@ -329,32 +362,10 @@ auto main(int argc, char** argv) -> int {
spdlog::set_level(spdlog::level::trace);
#endif

boost::program_options::variables_map const args = parse_args(argc, argv);

std::string storage_url;
std::vector<std::string> libs;
std::string worker_addr;
try {
if (!args.contains("storage_url")) {
spdlog::error("Missing storage_url");
return cCmdArgParseErr;
}
storage_url = args["storage_url"].as<std::string>();
if (!args.contains("host")) {
spdlog::error("Missing host");
return cCmdArgParseErr;
}
worker_addr = args["host"].as<std::string>();
if (!args.contains("libs") || args["libs"].empty()) {
spdlog::error("Missing libs");
return cCmdArgParseErr;
}
libs = args["libs"].as<std::vector<std::string>>();
} catch (boost::bad_any_cast const& e) {
spdlog::error("Error: {}", e.what());
return cCmdArgParseErr;
} catch (boost::program_options::error const& e) {
spdlog::error("Error: {}", e.what());
if (!parse_args(argc, argv, worker_addr, storage_url, libs)) {
return cCmdArgParseErr;
}

Expand Down
Loading