Skip to content

Commit 16eb24d

Browse files
committed
count --by-column
1 parent eb596c1 commit 16eb24d

File tree

5 files changed

+202
-9
lines changed

5 files changed

+202
-9
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,19 @@ cat sensors.out | sensor-data count
111111
sensor-data count --remove-errors input.out
112112
sensor-data count --only-value type:temperature input.out
113113

114+
# Count by column value
115+
sensor-data count --by-column sensor_id input.out
116+
sensor-data count -b sensor_id -of csv input.out
117+
sensor-data count -b sensor_id -of json input.out
118+
114119
# Count recursively
115120
sensor-data count -r -e .out /path/to/logs/
116121
```
117122

118123
**Options:**
119124
- `-if, --input-format <format>` - Input format: `json` or `csv` (auto-detected)
125+
- `-of, --output-format <format>` - Output format: `human`, `csv`, or `json` (for --by-column)
126+
- `-b, --by-column <column>` - Show counts per value in the specified column
120127
- `-f, --follow` - Follow mode: continuously read stdin and update count
121128
- `-r, --recursive` - Recursively process subdirectories
122129
- `-e, --extension <ext>` - Filter files by extension (e.g., `.out`)

debian/sensor-data.1

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ Only read the last n lines from each file (useful for quick checks on large file
123123
.BR \-if ", " \-\-input\-format " " \fIfmt\fR
124124
Input format for stdin: json or csv (default: json).
125125
.TP
126+
.BR \-of ", " \-\-output\-format " " \fIfmt\fR
127+
Output format: human (default), csv, or json. Only applies when using \-\-by\-column.
128+
.TP
129+
.BR \-b ", " \-\-by\-column " " \fIcolumn\fR
130+
Show counts per value in the specified column instead of total count.
131+
.TP
126132
.BR \-f ", " \-\-follow
127133
Follow mode: continuously read input and update count (stdin or single file).
128134
.TP

include/data_counter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <string>
55
#include <vector>
6+
#include <map>
67

78
#include "command_base.h"
89

@@ -16,10 +17,14 @@
1617
* - Error reading removal
1718
* - Recursive directory processing
1819
* - Follow mode for files and stdin (like tail -f)
20+
* - Count by column value (--by-column)
1921
*/
2022
class DataCounter : public CommandBase {
2123
private:
2224
bool followMode; // --follow flag for continuous monitoring
25+
std::string byColumn; // --by-column for counts per value
26+
std::string outputFormat; // --output-format: human, csv, json
27+
std::map<std::string, long long> valueCounts; // counts per column value
2328

2429
/**
2530
* Count readings from a single file

src/data_counter.cpp

Lines changed: 90 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ long long DataCounter::countFromFile(const std::string& filename) {
2626
if (reading.empty()) return;
2727
if (shouldIncludeReading(reading)) {
2828
count++;
29+
if (!byColumn.empty()) {
30+
auto it = reading.find(byColumn);
31+
std::string value = (it != reading.end()) ? it->second : "(missing)";
32+
valueCounts[value]++;
33+
}
2934
}
3035
});
3136

@@ -60,6 +65,11 @@ long long DataCounter::countFromStdin() {
6065

6166
if (shouldIncludeReading(reading)) {
6267
count++;
68+
if (!byColumn.empty()) {
69+
auto it = reading.find(byColumn);
70+
std::string value = (it != reading.end()) ? it->second : "(missing)";
71+
valueCounts[value]++;
72+
}
6373
}
6474
}
6575
} else {
@@ -75,6 +85,11 @@ long long DataCounter::countFromStdin() {
7585
if (reading.empty()) continue;
7686
if (shouldIncludeReading(reading)) {
7787
count++;
88+
if (!byColumn.empty()) {
89+
auto it = reading.find(byColumn);
90+
std::string value = (it != reading.end()) ? it->second : "(missing)";
91+
valueCounts[value]++;
92+
}
7893
}
7994
}
8095
}
@@ -242,7 +257,7 @@ void DataCounter::countFromFileFollow(const std::string& filename) {
242257

243258
// ===== Constructor =====
244259

245-
DataCounter::DataCounter(int argc, char* argv[]) : followMode(false) {
260+
DataCounter::DataCounter(int argc, char* argv[]) : followMode(false), byColumn(""), outputFormat("human") {
246261
// Check for help flag first
247262
for (int i = 1; i < argc; ++i) {
248263
std::string arg = argv[i];
@@ -252,25 +267,40 @@ DataCounter::DataCounter(int argc, char* argv[]) : followMode(false) {
252267
}
253268
}
254269

255-
// Parse --follow flag
256-
for (int i = 1; i < argc; ++i) {
270+
// Parse count-specific arguments and build filtered argv for CommonArgParser
271+
std::vector<char*> filteredArgv;
272+
for (int i = 0; i < argc; ++i) {
257273
std::string arg = argv[i];
258274
if (arg == "--follow" || arg == "-f") {
259275
followMode = true;
276+
continue;
277+
} else if ((arg == "--by-column" || arg == "-b") && i + 1 < argc) {
278+
byColumn = argv[i + 1];
279+
i++; // Skip the value
280+
continue;
281+
} else if ((arg == "--output-format" || arg == "-of") && i + 1 < argc) {
282+
outputFormat = argv[i + 1];
283+
if (outputFormat != "human" && outputFormat != "csv" && outputFormat != "json") {
284+
std::cerr << "Error: --output-format must be 'human', 'csv', or 'json'" << std::endl;
285+
exit(1);
286+
}
287+
i++; // Skip the value
288+
continue;
260289
}
290+
filteredArgv.push_back(argv[i]);
261291
}
262292

263-
// Parse common flags and collect files
293+
// Parse common flags and collect files using filtered argv
264294
CommonArgParser parser;
265-
if (!parser.parse(argc, argv)) {
295+
if (!parser.parse(static_cast<int>(filteredArgv.size()), filteredArgv.data())) {
266296
exit(1);
267297
}
268298

269299
copyFromParser(parser);
270300

271-
// Check for unknown options (count-specific: -f/--follow)
272-
std::string unknownOpt = CommonArgParser::checkUnknownOptions(argc, argv,
273-
{"-f", "--follow"});
301+
// Check for unknown options using filtered argv
302+
std::string unknownOpt = CommonArgParser::checkUnknownOptions(
303+
static_cast<int>(filteredArgv.size()), filteredArgv.data());
274304
if (!unknownOpt.empty()) {
275305
std::cerr << "Error: Unknown option '" << unknownOpt << "'" << std::endl;
276306
printCountUsage(argv[0]);
@@ -309,7 +339,55 @@ void DataCounter::count() {
309339
}
310340
}
311341

312-
std::cout << totalCount << std::endl;
342+
if (!byColumn.empty()) {
343+
// Convert to vector and sort by count descending
344+
std::vector<std::pair<std::string, long long>> results(valueCounts.begin(), valueCounts.end());
345+
std::sort(results.begin(), results.end(),
346+
[](const std::pair<std::string, long long>& a, const std::pair<std::string, long long>& b) {
347+
return a.second > b.second; // Sort by count descending
348+
});
349+
350+
if (outputFormat == "json") {
351+
std::cout << "[";
352+
bool first = true;
353+
for (const auto& pair : results) {
354+
if (!first) std::cout << ",";
355+
first = false;
356+
std::cout << "{\"" << byColumn << "\":\"" << pair.first
357+
<< "\",\"count\":" << pair.second << "}";
358+
}
359+
std::cout << "]\n";
360+
} else if (outputFormat == "csv") {
361+
std::cout << byColumn << ",count\n";
362+
for (const auto& pair : results) {
363+
std::cout << pair.first << "," << pair.second << "\n";
364+
}
365+
} else {
366+
// Human-readable format (default)
367+
// Find max value width for alignment
368+
size_t maxValueWidth = byColumn.length();
369+
for (const auto& pair : results) {
370+
maxValueWidth = std::max(maxValueWidth, pair.first.length());
371+
}
372+
373+
std::cout << "Counts by " << byColumn << ":\n\n";
374+
std::cout << std::left;
375+
std::cout.width(maxValueWidth + 2);
376+
std::cout << byColumn;
377+
std::cout << "Count\n";
378+
std::cout << std::string(maxValueWidth + 2 + 10, '-') << "\n";
379+
380+
for (const auto& pair : results) {
381+
std::cout.width(maxValueWidth + 2);
382+
std::cout << pair.first;
383+
std::cout << pair.second << "\n";
384+
}
385+
386+
std::cout << "\nTotal: " << totalCount << " reading(s)\n";
387+
}
388+
} else {
389+
std::cout << totalCount << std::endl;
390+
}
313391
}
314392

315393
// ===== Usage printing =====
@@ -322,7 +400,9 @@ void DataCounter::printCountUsage(const char* progName) {
322400
std::cerr << std::endl;
323401
std::cerr << "Options:" << std::endl;
324402
std::cerr << " -if, --input-format <fmt> Input format for stdin: json or csv (default: json)" << std::endl;
403+
std::cerr << " -of, --output-format <fmt> Output format: human (default), csv, or json" << std::endl;
325404
std::cerr << " -f, --follow Follow mode: continuously monitor file/stdin for new data" << std::endl;
405+
std::cerr << " -b, --by-column <col> Show counts per value in the specified column" << std::endl;
326406
std::cerr << " -r, --recursive Recursively process subdirectories" << std::endl;
327407
std::cerr << " -v Verbose output (show progress)" << std::endl;
328408
std::cerr << " -V Very verbose output (show detailed progress)" << std::endl;
@@ -345,6 +425,7 @@ void DataCounter::printCountUsage(const char* progName) {
345425
std::cerr << " " << progName << " count --remove-errors sensor1.out" << std::endl;
346426
std::cerr << " " << progName << " count --only-value type:temperature sensor1.out" << std::endl;
347427
std::cerr << " " << progName << " count --clean sensor.out # exclude empty values" << std::endl;
428+
std::cerr << " " << progName << " count --by-column sensor sensor1.out # count per sensor" << std::endl;
348429
std::cerr << " " << progName << " count --follow sensor.out" << std::endl;
349430
std::cerr << " tail -f sensor.out | " << progName << " count --follow" << std::endl;
350431
}

tests/test_count.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,100 @@ else
518518
FAILED=$((FAILED + 1))
519519
fi
520520

521+
# Test: --by-column with human output (default)
522+
echo ""
523+
echo "Test: --by-column with human output"
524+
result=$(cat <<'EOF' | ./sensor-data count --by-column sensor
525+
[{"sensor": "ds18b20", "value": "22.5"}]
526+
[{"sensor": "dht22", "value": "45"}]
527+
[{"sensor": "ds18b20", "value": "23.0"}]
528+
EOF
529+
)
530+
if echo "$result" | grep -q "Counts by sensor" && echo "$result" | grep -q "ds18b20" && echo "$result" | grep -q "dht22" && echo "$result" | grep -q "Total: 3"; then
531+
echo " ✓ PASS"
532+
PASSED=$((PASSED + 1))
533+
else
534+
echo " ✗ FAIL"
535+
echo " Expected: Human-readable counts by sensor"
536+
echo " Got: $result"
537+
FAILED=$((FAILED + 1))
538+
fi
539+
540+
# Test: --by-column with csv output
541+
echo ""
542+
echo "Test: --by-column with csv output"
543+
result=$(cat <<'EOF' | ./sensor-data count -b sensor -of csv
544+
[{"sensor": "ds18b20", "value": "22.5"}]
545+
[{"sensor": "dht22", "value": "45"}]
546+
[{"sensor": "ds18b20", "value": "23.0"}]
547+
EOF
548+
)
549+
if echo "$result" | head -1 | grep -q "sensor,count" && echo "$result" | grep -q "ds18b20,2" && echo "$result" | grep -q "dht22,1"; then
550+
echo " ✓ PASS"
551+
PASSED=$((PASSED + 1))
552+
else
553+
echo " ✗ FAIL"
554+
echo " Expected: CSV with sensor,count header and data rows"
555+
echo " Got: $result"
556+
FAILED=$((FAILED + 1))
557+
fi
558+
559+
# Test: --by-column with json output
560+
echo ""
561+
echo "Test: --by-column with json output"
562+
result=$(cat <<'EOF' | ./sensor-data count -b sensor -of json
563+
[{"sensor": "ds18b20", "value": "22.5"}]
564+
[{"sensor": "dht22", "value": "45"}]
565+
[{"sensor": "ds18b20", "value": "23.0"}]
566+
EOF
567+
)
568+
# Sorted by count descending: ds18b20 (2) before dht22 (1)
569+
if echo "$result" | grep -q '\[{"sensor":"ds18b20","count":2},{"sensor":"dht22","count":1}\]'; then
570+
echo " ✓ PASS"
571+
PASSED=$((PASSED + 1))
572+
else
573+
echo " ✗ FAIL"
574+
echo " Expected: JSON array with sensor/count objects (sorted by count descending)"
575+
echo " Got: $result"
576+
FAILED=$((FAILED + 1))
577+
fi
578+
579+
# Test: --by-column with filters
580+
echo ""
581+
echo "Test: --by-column with --remove-errors filter"
582+
result=$(cat <<'EOF' | ./sensor-data count -b sensor --remove-errors -of csv
583+
[{"sensor": "ds18b20", "value": "22.5"}]
584+
[{"sensor": "ds18b20", "value": "85"}]
585+
[{"sensor": "dht22", "value": "45"}]
586+
EOF
587+
)
588+
if echo "$result" | grep -q "ds18b20,1" && echo "$result" | grep -q "dht22,1"; then
589+
echo " ✓ PASS"
590+
PASSED=$((PASSED + 1))
591+
else
592+
echo " ✗ FAIL"
593+
echo " Expected: ds18b20,1 and dht22,1 (error reading excluded)"
594+
echo " Got: $result"
595+
FAILED=$((FAILED + 1))
596+
fi
597+
598+
# Test: --by-column with non-existent column
599+
echo ""
600+
echo "Test: --by-column with non-existent column returns empty values"
601+
result=$(cat <<'EOF' | ./sensor-data count -b missing_column -of csv
602+
[{"sensor": "ds18b20", "value": "22.5"}]
603+
EOF
604+
)
605+
if echo "$result" | grep -q "missing_column,count" && echo "$result" | grep -q ",1"; then
606+
echo " ✓ PASS"
607+
PASSED=$((PASSED + 1))
608+
else
609+
echo " ✗ FAIL"
610+
echo " Expected: Count with empty column value"
611+
echo " Got: $result"
612+
FAILED=$((FAILED + 1))
613+
fi
614+
521615
# Summary
522616
echo ""
523617
echo "================================"

0 commit comments

Comments
 (0)