22#define COMMON_ARG_PARSER_H
33
44#include < iostream>
5+ #include < fstream>
56#include < string>
67#include < algorithm>
78#include < map>
@@ -22,6 +23,7 @@ class CommonArgParser {
2223 std::vector<std::string> inputFiles;
2324 std::map<std::string, std::set<std::string>> onlyValueFilters;
2425 std::map<std::string, std::set<std::string>> excludeValueFilters;
26+ std::map<std::string, std::set<std::string>> allowedValues;
2527 std::set<std::string> notEmptyColumns;
2628 bool removeEmptyJson;
2729 bool removeErrors;
@@ -134,6 +136,51 @@ class CommonArgParser {
134136 std::cerr << " Error: --exclude-value requires an argument" << std::endl;
135137 return false ;
136138 }
139+ } else if (arg == " --allowed-values" ) {
140+ if (i + 2 < argc) {
141+ ++i;
142+ std::string column = argv[i];
143+ ++i;
144+ std::string valuesArg = argv[i];
145+ // Check if valuesArg is a file or comma-separated values
146+ std::ifstream file (valuesArg);
147+ if (file.good ()) {
148+ // It's a file - read values line by line
149+ std::string line;
150+ while (std::getline (file, line)) {
151+ // Trim whitespace
152+ size_t start = line.find_first_not_of (" \t\r\n " );
153+ size_t end = line.find_last_not_of (" \t\r\n " );
154+ if (start != std::string::npos && end != std::string::npos) {
155+ allowedValues[column].insert (line.substr (start, end - start + 1 ));
156+ }
157+ }
158+ } else {
159+ // Treat as comma-separated values
160+ size_t pos = 0 ;
161+ while (pos < valuesArg.length ()) {
162+ size_t commaPos = valuesArg.find (' ,' , pos);
163+ if (commaPos == std::string::npos) {
164+ commaPos = valuesArg.length ();
165+ }
166+ std::string val = valuesArg.substr (pos, commaPos - pos);
167+ // Trim whitespace
168+ size_t start = val.find_first_not_of (" \t " );
169+ size_t end = val.find_last_not_of (" \t " );
170+ if (start != std::string::npos && end != std::string::npos) {
171+ allowedValues[column].insert (val.substr (start, end - start + 1 ));
172+ }
173+ pos = commaPos + 1 ;
174+ }
175+ }
176+ if (allowedValues[column].empty ()) {
177+ std::cerr << " Error: --allowed-values requires at least one value" << std::endl;
178+ return false ;
179+ }
180+ } else {
181+ std::cerr << " Error: --allowed-values requires <column> and <values|file>" << std::endl;
182+ return false ;
183+ }
137184 } else if (arg == " -c" || arg == " --column" ) {
138185 // Skip this flag and its argument - handled by StatsAnalyser
139186 if (i + 1 < argc) {
@@ -208,6 +255,7 @@ class CommonArgParser {
208255 const std::vector<std::string>& getInputFiles () const { return inputFiles; }
209256 const std::map<std::string, std::set<std::string>>& getOnlyValueFilters () const { return onlyValueFilters; }
210257 const std::map<std::string, std::set<std::string>>& getExcludeValueFilters () const { return excludeValueFilters; }
258+ const std::map<std::string, std::set<std::string>>& getAllowedValues () const { return allowedValues; }
211259 const std::set<std::string>& getNotEmptyColumns () const { return notEmptyColumns; }
212260 bool getRemoveEmptyJson () const { return removeEmptyJson; }
213261 bool getRemoveErrors () const { return removeErrors; }
@@ -231,15 +279,16 @@ class CommonArgParser {
231279
232280 // Common filtering options
233281 static const std::set<std::string> filterOptions = {
234- " --not-empty" , " --only-value" , " --exclude-value" ,
282+ " --not-empty" , " --only-value" , " --exclude-value" , " --allowed-values " ,
235283 " --remove-errors" , " --remove-empty-json" , " --clean"
236284 };
237285
238286 // Options that take arguments (need to skip the next arg)
287+ // Note: --allowed-values takes TWO args but we handle that specially
239288 static const std::set<std::string> optionsWithArgs = {
240289 " -if" , " --input-format" , " -e" , " --extension" , " -d" , " --depth" ,
241290 " --min-date" , " --max-date" , " --not-empty" , " --only-value" ,
242- " --exclude-value" , " -o" , " --output" , " -of" , " --output-format" ,
291+ " --exclude-value" , " --allowed-values " , " - o" , " --output" , " -of" , " --output-format" ,
243292 " -c" , " --column"
244293 };
245294
0 commit comments