Skip to content

Commit b88d29e

Browse files
committed
--tail
1 parent e9709f2 commit b88d29e

File tree

10 files changed

+224
-207
lines changed

10 files changed

+224
-207
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ sensor-data transform -r -e .out /path/to/logs/ output.csv
9292
- `--only-value <col:val>` - Only include rows where column equals value
9393
- `--allowed-values <column> <values|file>` - Only include rows where column is in allowed values
9494
- `--clean` - Shorthand for `--remove-empty-json --not-empty value --remove-errors`
95+
- `--tail <n>` - Only read the last n lines from each file
9596
- `-v` - Verbose output
9697
- `-V` - Very verbose output
9798

@@ -129,6 +130,7 @@ sensor-data count -r -e .out /path/to/logs/
129130
- `--exclude-value <col:val>` - Exclude rows where column equals value
130131
- `--allowed-values <column> <values|file>` - Only include rows where column is in allowed values
131132
- `--clean` - Shorthand for `--remove-empty-json --not-empty value --remove-errors`
133+
- `--tail <n>` - Only read the last n lines from each file
132134
- `-v` - Verbose output
133135
- `-V` - Very verbose output
134136

@@ -192,6 +194,7 @@ sensor-data stats --clean input.out
192194
- `--remove-empty-json` - Remove empty JSON input lines
193195
- `--remove-errors` - Remove error readings (DS18B20 value=85 or -127)
194196
- `--clean` - Shorthand for `--remove-empty-json --not-empty value --remove-errors`
197+
- `--tail <n>` - Only read the last n lines from each file
195198
- `-r, --recursive` - Recursively process subdirectories
196199
- `-v` - Verbose output
197200
- `-V` - Very verbose output

completions/sensor-data.bash

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ _sensor_data() {
2121
local common_opts="-r --recursive -v -V -e --extension -d --depth -if --input-format --min-date --max-date"
2222

2323
# Command-specific options
24-
local transform_opts="-o --output -of --output-format --use-prototype --not-empty --only-value --exclude-value --remove-errors --remove-whitespace --remove-empty-json"
25-
local count_opts="-f --follow --not-empty --only-value --exclude-value --remove-errors --remove-empty-json"
24+
local transform_opts="-o --output -of --output-format --tail --use-prototype --not-empty --only-value --exclude-value --remove-errors --remove-whitespace --remove-empty-json"
25+
local count_opts="-f --follow --tail --not-empty --only-value --exclude-value --remove-errors --remove-empty-json"
2626
local list_errors_opts="-o --output"
2727
local summarise_errors_opts="-o --output"
28-
local stats_opts="-f --follow -o --output --column --group-by"
28+
local stats_opts="-f --follow --tail -o --output --column --group-by"
2929

3030
# Determine which command we're completing for
3131
local cmd=""
@@ -66,6 +66,11 @@ _sensor_data() {
6666
COMPREPLY=($(compgen -W "0 1 2 3 5 10" -- "$cur"))
6767
return
6868
;;
69+
--tail)
70+
# Suggest some common tail values
71+
COMPREPLY=($(compgen -W "10 50 100 500 1000" -- "$cur"))
72+
return
73+
;;
6974
-if|--input-format)
7075
COMPREPLY=($(compgen -W "json csv" -- "$cur"))
7176
return

debian/sensor-data.1

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ Filter readings after this date (Unix timestamp, ISO date, or DD/MM/YYYY).
111111
.TP
112112
.BI \-\-max\-date " date"
113113
Filter readings before this date (Unix timestamp, ISO date, or DD/MM/YYYY).
114+
.TP
115+
.BI \-\-tail " n"
116+
Only read the last n lines from each file (useful for quick checks on large files).
114117
.SH COUNT OPTIONS
115118
.TP
116119
.BR \-if ", " \-\-input\-format " " \fIfmt\fR
@@ -162,6 +165,9 @@ Filter readings after this date.
162165
.TP
163166
.BI \-\-max\-date " date"
164167
Filter readings before this date.
168+
.TP
169+
.BI \-\-tail " n"
170+
Only read the last n lines from each file (useful for quick checks on large files).
165171
.SH LIST\-ERRORS AND SUMMARISE\-ERRORS OPTIONS
166172
.TP
167173
.BR \-if ", " \-\-input\-format " " \fIfmt\fR
@@ -241,6 +247,9 @@ Filter readings after this date.
241247
.TP
242248
.BI \-\-max\-date " date"
243249
Filter readings before this date.
250+
.TP
251+
.BI \-\-tail " n"
252+
Only read the last n lines from each file (useful for quick checks on large files).
244253
.SH EXAMPLES
245254
Transform a JSON sensor file to CSV:
246255
.PP

include/command_base.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class CommandBase {
4545
std::map<std::string, std::set<std::string>> excludeValueFilters;
4646
std::map<std::string, std::set<std::string>> allowedValues;
4747

48+
// Performance options
49+
int tailLines; // --tail <n>: only read last n lines from each file (0 = read all)
50+
4851
// Constructor with default values
4952
CommandBase()
5053
: hasInputFiles(false)
@@ -56,7 +59,8 @@ class CommandBase {
5659
, minDate(0)
5760
, maxDate(0)
5861
, removeErrors(false)
59-
, removeEmptyJson(false) {}
62+
, removeEmptyJson(false)
63+
, tailLines(0) {}
6064

6165
virtual ~CommandBase() = default;
6266

@@ -182,6 +186,7 @@ class CommandBase {
182186
notEmptyColumns = parser.getNotEmptyColumns();
183187
removeEmptyJson = parser.getRemoveEmptyJson();
184188
removeErrors = parser.getRemoveErrors();
189+
tailLines = parser.getTailLines();
185190
}
186191

187192
/**

include/common_arg_parser.h

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ class CommonArgParser {
2727
std::set<std::string> notEmptyColumns;
2828
bool removeEmptyJson;
2929
bool removeErrors;
30+
int tailLines; // --tail <n>: only read last n lines from each file
3031

3132
public:
3233
CommonArgParser()
3334
: recursive(false), extensionFilter(""), maxDepth(-1), verbosity(0),
34-
inputFormat("json"), minDate(0), maxDate(0), removeEmptyJson(false), removeErrors(false) {}
35+
inputFormat("json"), minDate(0), maxDate(0), removeEmptyJson(false), removeErrors(false),
36+
tailLines(0) {}
3537

3638
// Parse common arguments and collect files
3739
// Returns true if parsing should continue, false if help was shown or error occurred
@@ -82,6 +84,23 @@ class CommonArgParser {
8284
}
8385
} else if (arg == "-f" || arg == "--follow") {
8486
// Skip this flag - handled by DataCounter and StatsAnalyser
87+
} else if (arg == "--tail") {
88+
if (i + 1 < argc) {
89+
++i;
90+
try {
91+
tailLines = std::stoi(argv[i]);
92+
if (tailLines <= 0) {
93+
std::cerr << "Error: --tail requires a positive number" << std::endl;
94+
return false;
95+
}
96+
} catch (...) {
97+
std::cerr << "Error: invalid value for --tail: " << argv[i] << std::endl;
98+
return false;
99+
}
100+
} else {
101+
std::cerr << "Error: --tail requires a number argument" << std::endl;
102+
return false;
103+
}
85104
} else if (arg == "-o" || arg == "--output") {
86105
// Skip this flag and its argument - handled by SensorDataTransformer
87106
if (i + 1 < argc) {
@@ -259,6 +278,7 @@ class CommonArgParser {
259278
const std::set<std::string>& getNotEmptyColumns() const { return notEmptyColumns; }
260279
bool getRemoveEmptyJson() const { return removeEmptyJson; }
261280
bool getRemoveErrors() const { return removeErrors; }
281+
int getTailLines() const { return tailLines; }
262282

263283
/**
264284
* Check for unknown options in command line arguments.
@@ -274,7 +294,7 @@ class CommonArgParser {
274294
static const std::set<std::string> commonOptions = {
275295
"-r", "--recursive", "-v", "-V", "-if", "--input-format",
276296
"-e", "--extension", "-d", "--depth", "--min-date", "--max-date",
277-
"-h", "--help"
297+
"--tail", "-h", "--help"
278298
};
279299

280300
// Common filtering options
@@ -289,7 +309,7 @@ class CommonArgParser {
289309
"-if", "--input-format", "-e", "--extension", "-d", "--depth",
290310
"--min-date", "--max-date", "--not-empty", "--only-value",
291311
"--exclude-value", "--allowed-values", "-o", "--output", "-of", "--output-format",
292-
"-c", "--column"
312+
"-c", "--column", "--tail"
293313
};
294314

295315
for (int i = 1; i < argc; ++i) {

include/data_reader.h

Lines changed: 76 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <iostream>
55
#include <fstream>
6+
#include <sstream>
67
#include <string>
78
#include <vector>
89
#include <map>
@@ -18,6 +19,7 @@ class DataReader {
1819
long long maxDate;
1920
int verbosity;
2021
std::string inputFormat; // "json" or "csv"
22+
int tailLines; // 0 = read all, >0 = read only last n lines
2123

2224
bool passesDateFilter(const std::map<std::string, std::string>& reading) const {
2325
if (minDate <= 0 && maxDate <= 0) return true;
@@ -26,8 +28,8 @@ class DataReader {
2628
}
2729

2830
public:
29-
DataReader(long long minDate = 0, long long maxDate = 0, int verbosity = 0, const std::string& format = "json")
30-
: minDate(minDate), maxDate(maxDate), verbosity(verbosity), inputFormat(format) {}
31+
DataReader(long long minDate = 0, long long maxDate = 0, int verbosity = 0, const std::string& format = "json", int tailLines = 0)
32+
: minDate(minDate), maxDate(maxDate), verbosity(verbosity), inputFormat(format), tailLines(tailLines) {}
3133

3234
// Internal helper to process a stream (CSV or JSON format)
3335
template<typename Callback>
@@ -94,16 +96,81 @@ class DataReader {
9496
void processFile(const std::string& filename, Callback callback) {
9597
if (verbosity >= 1) {
9698
std::cout << "Processing file: " << filename << std::endl;
99+
if (tailLines > 0) {
100+
std::cout << " (reading last " << tailLines << " lines only)" << std::endl;
101+
}
97102
}
98103

99-
std::ifstream infile(filename);
100-
if (!infile) {
101-
std::cerr << "Warning: Cannot open file: " << filename << std::endl;
102-
return;
103-
}
104+
bool isCSV = FileUtils::isCsvFile(filename);
104105

105-
processStream(infile, FileUtils::isCsvFile(filename), callback, filename);
106-
infile.close();
106+
if (tailLines > 0) {
107+
// Use tail mode
108+
if (isCSV) {
109+
// CSV: read header from file, then process tail lines
110+
std::ifstream headerFile(filename);
111+
if (!headerFile) {
112+
std::cerr << "Warning: Cannot open file: " << filename << std::endl;
113+
return;
114+
}
115+
std::string headerLine;
116+
std::vector<std::string> csvHeaders;
117+
if (std::getline(headerFile, headerLine) && !headerLine.empty()) {
118+
bool needMore = false;
119+
csvHeaders = CsvParser::parseCsvLine(headerFile, headerLine, needMore);
120+
}
121+
headerFile.close();
122+
123+
// Read tail lines (+1 to account for header potentially being in tail)
124+
auto lines = FileUtils::readTailLines(filename, tailLines + 1);
125+
int lineNum = 0;
126+
127+
for (const auto& line : lines) {
128+
lineNum++;
129+
if (line.empty()) continue;
130+
// Skip if this is the header line
131+
if (line == headerLine) continue;
132+
133+
auto fields = CsvParser::parseCsvLine(line);
134+
if (fields.empty()) continue;
135+
136+
std::map<std::string, std::string> reading;
137+
for (size_t i = 0; i < std::min(csvHeaders.size(), fields.size()); ++i) {
138+
reading[csvHeaders[i]] = fields[i];
139+
}
140+
141+
if (!passesDateFilter(reading)) continue;
142+
143+
callback(reading, lineNum, filename);
144+
}
145+
} else {
146+
// JSON: just process tail lines
147+
auto lines = FileUtils::readTailLines(filename, tailLines);
148+
int lineNum = 0;
149+
150+
for (const auto& line : lines) {
151+
lineNum++;
152+
if (line.empty()) continue;
153+
154+
auto readings = JsonParser::parseJsonLine(line);
155+
for (const auto& reading : readings) {
156+
if (reading.empty()) continue;
157+
158+
if (!passesDateFilter(reading)) continue;
159+
160+
callback(reading, lineNum, filename);
161+
}
162+
}
163+
}
164+
} else {
165+
std::ifstream infile(filename);
166+
if (!infile) {
167+
std::cerr << "Warning: Cannot open file: " << filename << std::endl;
168+
return;
169+
}
170+
171+
processStream(infile, isCSV, callback, filename);
172+
infile.close();
173+
}
107174
}
108175
};
109176

include/file_utils.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,17 @@ class FileUtils {
99
static bool isDirectory(const std::string& path);
1010
static bool isCsvFile(const std::string& filename);
1111
static bool matchesExtension(const std::string& filename, const std::string& extensionFilter);
12+
13+
/**
14+
* Read the last n lines from a file.
15+
* Returns the lines in order (first to last).
16+
* If the file has fewer than n lines, returns all lines.
17+
*
18+
* @param filename Path to the file
19+
* @param n Number of lines to read from the end
20+
* @return Vector of strings, each being a line (without newline)
21+
*/
22+
static std::vector<std::string> readTailLines(const std::string& filename, int n);
1223
};
1324

1425
#endif // FILE_UTILS_H

src/data_counter.cpp

Lines changed: 11 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "data_counter.h"
22
#include <fstream>
3+
#include <sstream>
34
#include <algorithm>
45
#include <chrono>
56
#include <thread>
@@ -8,6 +9,7 @@
89
#include "json_parser.h"
910
#include "file_utils.h"
1011
#include "file_collector.h"
12+
#include "data_reader.h"
1113

1214
// ===== Private methods =====
1315

@@ -16,60 +18,17 @@ long long DataCounter::countFromFile(const std::string& filename) {
1618
std::cerr << "Counting: " << filename << std::endl;
1719
}
1820

19-
std::ifstream infile(filename);
20-
if (!infile) {
21-
std::cerr << "Warning: Cannot open file: " << filename << std::endl;
22-
return 0;
23-
}
24-
2521
long long count = 0;
26-
std::string line;
27-
28-
if (FileUtils::isCsvFile(filename)) {
29-
// CSV format - first line is header
30-
std::vector<std::string> csvHeaders;
31-
if (std::getline(infile, line) && !line.empty()) {
32-
bool needMore = false;
33-
csvHeaders = CsvParser::parseCsvLine(infile, line, needMore);
34-
}
35-
36-
// Process data rows
37-
while (std::getline(infile, line)) {
38-
if (line.empty()) continue;
39-
40-
bool needMore = false;
41-
auto fields = CsvParser::parseCsvLine(infile, line, needMore);
42-
if (fields.empty()) continue;
43-
44-
std::map<std::string, std::string> reading;
45-
for (size_t i = 0; i < std::min(csvHeaders.size(), fields.size()); ++i) {
46-
reading[csvHeaders[i]] = fields[i];
47-
}
48-
49-
if (shouldIncludeReading(reading)) {
50-
count++;
51-
}
52-
}
53-
} else {
54-
// JSON format
55-
while (std::getline(infile, line)) {
56-
if (line.empty()) continue;
57-
58-
auto readings = JsonParser::parseJsonLine(line);
59-
60-
// Skip empty JSON arrays/objects if removeEmptyJson is set
61-
if (removeEmptyJson && areAllReadingsEmpty(readings)) continue;
62-
63-
for (const auto& reading : readings) {
64-
if (reading.empty()) continue;
65-
if (shouldIncludeReading(reading)) {
66-
count++;
67-
}
68-
}
22+
DataReader reader(minDate, maxDate, verbosity, FileUtils::isCsvFile(filename) ? "csv" : "json", tailLines);
23+
24+
reader.processFile(filename, [&](const std::map<std::string, std::string>& reading, int /*lineNum*/, const std::string& /*source*/) {
25+
// Skip empty JSON arrays/objects if removeEmptyJson is set
26+
if (reading.empty()) return;
27+
if (shouldIncludeReading(reading)) {
28+
count++;
6929
}
70-
}
30+
});
7131

72-
infile.close();
7332
return count;
7433
}
7534

@@ -377,6 +336,7 @@ void DataCounter::printCountUsage(const char* progName) {
377336
std::cerr << " --clean Shorthand for --remove-empty-json --not-empty value --remove-errors" << std::endl;
378337
std::cerr << " --min-date <date> Filter readings after this date" << std::endl;
379338
std::cerr << " --max-date <date> Filter readings before this date" << std::endl;
339+
std::cerr << " --tail <n> Only read the last n lines from each file" << std::endl;
380340
std::cerr << std::endl;
381341
std::cerr << "Examples:" << std::endl;
382342
std::cerr << " " << progName << " count sensor1.out" << std::endl;

0 commit comments

Comments
 (0)