Skip to content

Commit dc72218

Browse files
committed
--not-null
1 parent ff35896 commit dc72218

File tree

8 files changed

+145
-10
lines changed

8 files changed

+145
-10
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,10 @@ sensor-data transform -r -e .out /path/to/logs/ output.csv
8989
- `--remove-whitespace` - Remove extra whitespace from output (compact format)
9090
- `--remove-empty-json` - Remove empty JSON input lines (e.g., `[{}]`, `[]`)
9191
- `--not-empty <column>` - Skip rows where column is empty
92+
- `--not-null <column>` - Skip rows where column contains the literal string "null" or ASCII null characters
9293
- `--only-value <col:val>` - Only include rows where column equals value
9394
- `--allowed-values <column> <values|file>` - Only include rows where column is in allowed values
94-
- `--clean` - Shorthand for `--remove-empty-json --not-empty value --remove-errors`
95+
- `--clean` - Shorthand for `--remove-empty-json --not-empty value --remove-errors --not-null value --not-null sensor_id`
9596
- `--tail <n>` - Only read the last n lines from each file
9697
- `-v` - Verbose output
9798
- `-V` - Very verbose output
@@ -198,9 +199,10 @@ sensor-data stats --clean input.out
198199
- `--exclude-value <col:val>` - Exclude rows where column equals value
199200
- `--allowed-values <column> <values|file>` - Only include rows where column is in allowed values
200201
- `--not-empty <column>` - Skip rows where column is empty
202+
- `--not-null <column>` - Skip rows where column contains the literal string "null" or ASCII null characters
201203
- `--remove-empty-json` - Remove empty JSON input lines
202204
- `--remove-errors` - Remove error readings (DS18B20 value=85 or -127)
203-
- `--clean` - Shorthand for `--remove-empty-json --not-empty value --remove-errors`
205+
- `--clean` - Shorthand for `--remove-empty-json --not-empty value --not-null value --remove-errors`
204206
- `--tail <n>` - Only read the last n lines from each file
205207
- `-r, --recursive` - Recursively process subdirectories
206208
- `-v` - Verbose output

debian/sensor-data.1

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ Maximum recursion depth (0 = current dir only).
150150
.BI \-\-not\-empty " column"
151151
Skip rows where column is empty (can be used multiple times).
152152
.TP
153+
.BI \-\-not\-null " column"
154+
Skip rows where column contains the literal string "null" (can be used multiple times).
155+
.TP
153156
.BI \-\-only\-value " col:val"
154157
Only include rows where column has specific value (can be used multiple times).
155158
.TP
@@ -168,7 +171,7 @@ Remove error readings (DS18B20 value=85 or \-127).
168171
Remove empty JSON input lines (e.g., [{}], []).
169172
.TP
170173
.B \-\-clean
171-
Shorthand for \-\-remove\-empty\-json \-\-not\-empty value \-\-remove\-errors.
174+
Shorthand for \-\-remove\-empty\-json \-\-not\-empty value \-\-remove\-errors \-\-not\-null value \-\-not\-null sensor_id.
172175
.TP
173176
.BI \-\-min\-date " date"
174177
Filter readings after this date.
@@ -228,14 +231,17 @@ or a file path with one value per line.
228231
.BI \-\-not\-empty " column"
229232
Skip rows where column is empty (can be used multiple times).
230233
.TP
234+
.BI \-\-not\-null " column"
235+
Skip rows where column contains the literal string "null" (can be used multiple times).
236+
.TP
231237
.B \-\-remove\-empty\-json
232238
Remove empty JSON input lines (e.g., [{}], []).
233239
.TP
234240
.B \-\-remove\-errors
235241
Remove error readings (DS18B20 value=85 or \-127).
236242
.TP
237243
.B \-\-clean
238-
Shorthand for \-\-remove\-empty\-json \-\-not\-empty value \-\-remove\-errors.
244+
Shorthand for \-\-remove\-empty\-json \-\-not\-empty value \-\-remove\-errors \-\-not\-null value \-\-not\-null sensor_id.
239245
.TP
240246
.BR \-r ", " \-\-recursive
241247
Recursively process subdirectories.

include/command_base.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class CommandBase {
4343
bool removeErrors;
4444
bool removeEmptyJson;
4545
std::set<std::string> notEmptyColumns;
46+
std::set<std::string> notNullColumns;
4647
std::map<std::string, std::set<std::string>> onlyValueFilters;
4748
std::map<std::string, std::set<std::string>> excludeValueFilters;
4849
std::map<std::string, std::set<std::string>> allowedValues;
@@ -113,6 +114,21 @@ class CommandBase {
113114
}
114115
}
115116

117+
// Check if any required columns contain null values
118+
for (const auto& reqCol : notNullColumns) {
119+
auto it = reading.find(reqCol);
120+
if (it != reading.end()) {
121+
const std::string& val = it->second;
122+
// Check for literal "null" string or ASCII null character
123+
if (val == "null" || val.find('\0') != std::string::npos) {
124+
if (verbosity >= 2) {
125+
std::cerr << " Skipping row: null value in column '" << reqCol << "'" << std::endl;
126+
}
127+
return false;
128+
}
129+
}
130+
}
131+
116132
// Check value filters (include)
117133
for (const auto& filter : onlyValueFilters) {
118134
auto it = reading.find(filter.first);
@@ -186,6 +202,7 @@ class CommandBase {
186202
excludeValueFilters = parser.getExcludeValueFilters();
187203
allowedValues = parser.getAllowedValues();
188204
notEmptyColumns = parser.getNotEmptyColumns();
205+
notNullColumns = parser.getNotNullColumns();
189206
removeEmptyJson = parser.getRemoveEmptyJson();
190207
removeErrors = parser.getRemoveErrors();
191208
tailLines = parser.getTailLines();

include/common_arg_parser.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class CommonArgParser {
2828
std::map<std::string, std::set<std::string>> excludeValueFilters;
2929
std::map<std::string, std::set<std::string>> allowedValues;
3030
std::set<std::string> notEmptyColumns;
31+
std::set<std::string> notNullColumns;
3132
bool removeEmptyJson;
3233
bool removeErrors;
3334
int tailLines; // --tail <n>: only read last n lines from each file
@@ -117,15 +118,25 @@ class CommonArgParser {
117118
std::cerr << "Error: --not-empty requires an argument" << std::endl;
118119
return false;
119120
}
121+
} else if (arg == "--not-null") {
122+
if (i + 1 < argc) {
123+
++i;
124+
notNullColumns.insert(argv[i]);
125+
} else {
126+
std::cerr << "Error: --not-null requires an argument" << std::endl;
127+
return false;
128+
}
120129
} else if (arg == "--remove-empty-json") {
121130
removeEmptyJson = true;
122131
} else if (arg == "--remove-errors") {
123132
removeErrors = true;
124133
} else if (arg == "--clean") {
125-
// --clean expands to --remove-empty-json --not-empty value --remove-errors
134+
// --clean expands to --remove-empty-json --not-empty value --remove-errors --not-null value --not-null sensor_id
126135
removeEmptyJson = true;
127136
notEmptyColumns.insert("value");
128137
removeErrors = true;
138+
notNullColumns.insert("value");
139+
notNullColumns.insert("sensor_id");
129140
} else if (arg == "--only-value") {
130141
if (i + 1 < argc) {
131142
++i;
@@ -279,6 +290,7 @@ class CommonArgParser {
279290
const std::map<std::string, std::set<std::string>>& getExcludeValueFilters() const { return excludeValueFilters; }
280291
const std::map<std::string, std::set<std::string>>& getAllowedValues() const { return allowedValues; }
281292
const std::set<std::string>& getNotEmptyColumns() const { return notEmptyColumns; }
293+
const std::set<std::string>& getNotNullColumns() const { return notNullColumns; }
282294
bool getRemoveEmptyJson() const { return removeEmptyJson; }
283295
bool getRemoveErrors() const { return removeErrors; }
284296
int getTailLines() const { return tailLines; }
@@ -302,15 +314,15 @@ class CommonArgParser {
302314

303315
// Common filtering options
304316
static const std::set<std::string> filterOptions = {
305-
"--not-empty", "--only-value", "--exclude-value", "--allowed-values",
317+
"--not-empty", "--not-null", "--only-value", "--exclude-value", "--allowed-values",
306318
"--remove-errors", "--remove-empty-json", "--clean"
307319
};
308320

309321
// Options that take arguments (need to skip the next arg)
310322
// Note: --allowed-values takes TWO args but we handle that specially
311323
static const std::set<std::string> optionsWithArgs = {
312324
"-if", "--input-format", "-e", "--extension", "-d", "--depth",
313-
"--min-date", "--max-date", "--not-empty", "--only-value",
325+
"--min-date", "--max-date", "--not-empty", "--not-null", "--only-value",
314326
"--exclude-value", "--allowed-values", "-o", "--output", "-of", "--output-format",
315327
"-c", "--column", "--tail"
316328
};

src/data_counter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,11 +423,12 @@ void DataCounter::printCountUsage(const char* progName) {
423423
std::cerr << " -e, --extension <ext> Filter files by extension (e.g., .out or out)" << std::endl;
424424
std::cerr << " -d, --depth <n> Maximum recursion depth (0 = current dir only)" << std::endl;
425425
std::cerr << " --not-empty <column> Skip rows where column is empty (can be used multiple times)" << std::endl;
426+
std::cerr << " --not-null <column> Skip rows where column is 'null' (can be used multiple times)" << std::endl;
426427
std::cerr << " --only-value <col:val> Only include rows where column has specific value" << std::endl;
427428
std::cerr << " --exclude-value <col:val> Exclude rows where column has specific value" << std::endl;
428429
std::cerr << " --remove-errors Remove error readings (DS18B20 value=85 or -127)" << std::endl;
429430
std::cerr << " --remove-empty-json Remove empty JSON input lines (e.g., [{}], [])" << std::endl;
430-
std::cerr << " --clean Shorthand for --remove-empty-json --not-empty value --remove-errors" << std::endl;
431+
std::cerr << " --clean Shorthand for --remove-empty-json --not-empty value --remove-errors --not-null value --not-null sensor_id" << std::endl;
431432
std::cerr << " --min-date <date> Filter readings after this date" << std::endl;
432433
std::cerr << " --max-date <date> Filter readings before this date" << std::endl;
433434
std::cerr << " --tail <n> Only read the last n lines from each file" << std::endl;

src/sensor_data_transformer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ void SensorDataTransformer::printListRejectsUsage(const char* progName) {
675675
std::cerr << " --allowed-values <col> <values|file> List rows where column is NOT in allowed list" << std::endl;
676676
std::cerr << " --remove-errors List error readings (DS18B20 value=85 or -127)" << std::endl;
677677
std::cerr << " --remove-empty-json List empty JSON input lines" << std::endl;
678-
std::cerr << " --clean Shorthand for --remove-empty-json --not-empty value --remove-errors" << std::endl;
678+
std::cerr << " --clean Shorthand for --remove-empty-json --not-empty value --remove-errors --not-null value --not-null sensor_id" << std::endl;
679679
std::cerr << " --min-date <date> List readings before this date" << std::endl;
680680
std::cerr << " --max-date <date> List readings after this date" << std::endl;
681681
std::cerr << std::endl;

src/stats_analyser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ void StatsAnalyser::printStatsUsage(const char* progName) {
601601
std::cerr << " --not-empty <col> Only include rows where column is not empty" << std::endl;
602602
std::cerr << " --remove-empty-json Remove rows with empty JSON objects" << std::endl;
603603
std::cerr << " --remove-errors Remove error readings (DS18B20 value=85 or -127)" << std::endl;
604-
std::cerr << " --clean Shorthand for --remove-empty-json --not-empty value --remove-errors" << std::endl;
604+
std::cerr << " --clean Shorthand for --remove-empty-json --not-empty value --remove-errors --not-null value --not-null sensor_id" << std::endl;
605605
std::cerr << " -r, --recursive Recursively process subdirectories" << std::endl;
606606
std::cerr << " -v Verbose output" << std::endl;
607607
std::cerr << " -V Very verbose output" << std::endl;

tests/test_count.sh

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,103 @@ else
612612
FAILED=$((FAILED + 1))
613613
fi
614614

615+
# Test: --not-null filters out "null" string values
616+
echo ""
617+
echo "Test: --not-null filters out literal 'null' string"
618+
result=$(cat <<'EOF' | ./sensor-data count --not-null value
619+
[{"sensor_id": "s1", "value": "10"}]
620+
[{"sensor_id": "s2", "value": "null"}]
621+
[{"sensor_id": "s3", "value": "20"}]
622+
EOF
623+
)
624+
if [ "$result" = "2" ]; then
625+
echo " ✓ PASS"
626+
PASSED=$((PASSED + 1))
627+
else
628+
echo " ✗ FAIL"
629+
echo " Expected: 2"
630+
echo " Got: $result"
631+
FAILED=$((FAILED + 1))
632+
fi
633+
634+
# Test: --not-null on sensor_id column
635+
echo ""
636+
echo "Test: --not-null filters null sensor_id"
637+
result=$(cat <<'EOF' | ./sensor-data count --not-null sensor_id
638+
[{"sensor_id": "s1", "value": "10"}]
639+
[{"sensor_id": "null", "value": "20"}]
640+
[{"sensor_id": "s3", "value": "30"}]
641+
EOF
642+
)
643+
if [ "$result" = "2" ]; then
644+
echo " ✓ PASS"
645+
PASSED=$((PASSED + 1))
646+
else
647+
echo " ✗ FAIL"
648+
echo " Expected: 2"
649+
echo " Got: $result"
650+
FAILED=$((FAILED + 1))
651+
fi
652+
653+
# Test: --not-null can be used multiple times
654+
echo ""
655+
echo "Test: --not-null can filter multiple columns"
656+
result=$(cat <<'EOF' | ./sensor-data count --not-null value --not-null sensor_id
657+
[{"sensor_id": "s1", "value": "10"}]
658+
[{"sensor_id": "null", "value": "20"}]
659+
[{"sensor_id": "s3", "value": "null"}]
660+
[{"sensor_id": "s4", "value": "40"}]
661+
EOF
662+
)
663+
if [ "$result" = "2" ]; then
664+
echo " ✓ PASS"
665+
PASSED=$((PASSED + 1))
666+
else
667+
echo " ✗ FAIL"
668+
echo " Expected: 2"
669+
echo " Got: $result"
670+
FAILED=$((FAILED + 1))
671+
fi
672+
673+
# Test: --clean now includes --not-null for value and sensor_id
674+
echo ""
675+
echo "Test: --clean filters null values in value and sensor_id"
676+
result=$(cat <<'EOF' | ./sensor-data count --clean
677+
[{"sensor_id": "s1", "value": "10"}]
678+
[{"sensor_id": "null", "value": "20"}]
679+
[{"sensor_id": "s3", "value": "null"}]
680+
[{"sensor_id": "s4", "value": ""}]
681+
EOF
682+
)
683+
if [ "$result" = "1" ]; then
684+
echo " ✓ PASS"
685+
PASSED=$((PASSED + 1))
686+
else
687+
echo " ✗ FAIL"
688+
echo " Expected: 1"
689+
echo " Got: $result"
690+
FAILED=$((FAILED + 1))
691+
fi
692+
693+
# Test: --not-null does not filter missing columns (only filters if column exists with null value)
694+
echo ""
695+
echo "Test: --not-null does not filter rows with missing column"
696+
result=$(cat <<'EOF' | ./sensor-data count --not-null value
697+
[{"sensor_id": "s1", "value": "10"}]
698+
[{"sensor_id": "s2"}]
699+
[{"sensor_id": "s3", "value": "null"}]
700+
EOF
701+
)
702+
if [ "$result" = "2" ]; then
703+
echo " ✓ PASS"
704+
PASSED=$((PASSED + 1))
705+
else
706+
echo " ✗ FAIL"
707+
echo " Expected: 2 (missing column is not filtered, only 'null' value is)"
708+
echo " Got: $result"
709+
FAILED=$((FAILED + 1))
710+
fi
711+
615712
# Summary
616713
echo ""
617714
echo "================================"

0 commit comments

Comments
 (0)