Skip to content

Commit 0fed7a1

Browse files
committed
Add 'latest' command to show latest timestamp per sensor
New command: sensor-data latest - Shows the most recent timestamp for each sensor_id - Human-readable table output by default - Supports -of csv/json for machine-readable output - -n flag to limit results (positive=first n, negative=last n) - Supports --min-date, --max-date, --tail filters - Uses shared DataReader for consistent file handling Includes comprehensive test suite (test_latest.sh)
1 parent b88d29e commit 0fed7a1

File tree

8 files changed

+796
-3
lines changed

8 files changed

+796
-3
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ endif
1818

1919
# Source files
2020
SOURCES = src/sensor-data.cpp
21-
LIB_SOURCES = src/csv_parser.cpp src/json_parser.cpp src/error_detector.cpp src/file_utils.cpp src/sensor_data_transformer.cpp src/data_counter.cpp src/error_lister.cpp src/error_summarizer.cpp src/stats_analyser.cpp
21+
LIB_SOURCES = src/csv_parser.cpp src/json_parser.cpp src/error_detector.cpp src/file_utils.cpp src/sensor_data_transformer.cpp src/data_counter.cpp src/error_lister.cpp src/error_summarizer.cpp src/stats_analyser.cpp src/latest_finder.cpp
2222
TEST_SOURCES = tests/test_csv_parser.cpp tests/test_json_parser.cpp tests/test_error_detector.cpp tests/test_file_utils.cpp tests/test_date_utils.cpp
2323

2424
# Object files

README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,43 @@ value:
256256
From: 20.0 -> 22.5
257257
```
258258

259+
### latest
260+
261+
Find the latest timestamp for each sensor.
262+
263+
```bash
264+
# Get latest timestamp per sensor
265+
sensor-data latest input.out
266+
267+
# Limit to first n sensors (alphabetically by sensor_id)
268+
sensor-data latest -n 5 input.out
269+
270+
# Limit to last n sensors
271+
sensor-data latest -n -5 input.out
272+
273+
# With date filtering
274+
sensor-data latest --min-date 2026-01-01 input.out
275+
```
276+
277+
**Options:**
278+
- `-n <num>` - Limit output rows (positive = first n, negative = last n)
279+
- `-of, --output-format <fmt>` - Output format: `human` (default), `csv`, or `json`
280+
- `-if, --input-format <format>` - Input format: `json` or `csv` (auto-detected)
281+
- `--min-date <date>` - Include only readings on or after this date
282+
- `--max-date <date>` - Include only readings on or before this date
283+
- `--tail <n>` - Only read the last n lines from each file
284+
- `-r, --recursive` - Recursively process subdirectories
285+
- `-e, --extension <ext>` - Filter files by extension (e.g., `.out`)
286+
- `-v` - Verbose output
287+
288+
**Output columns:** `sensor_id`, `unix_timestamp`, `iso_date`
289+
290+
Example output:
291+
```
292+
sensor001,1737315700,2025-01-19 19:41:40
293+
sensor002,1737315650,2025-01-19 19:40:50
294+
```
295+
259296
## Date Formats
260297

261298
The `--min-date` and `--max-date` options accept:

completions/sensor-data.bash

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ _sensor_data() {
1515
cword=$COMP_CWORD
1616
fi
1717

18-
local commands="transform count list-errors summarise-errors stats"
18+
local commands="transform count list-errors summarise-errors stats latest"
1919

2020
# Common options for all commands
2121
local common_opts="-r --recursive -v -V -e --extension -d --depth -if --input-format --min-date --max-date"
@@ -26,13 +26,14 @@ _sensor_data() {
2626
local list_errors_opts="-o --output"
2727
local summarise_errors_opts="-o --output"
2828
local stats_opts="-f --follow --tail -o --output --column --group-by"
29+
local latest_opts="-n -of --output-format --tail"
2930

3031
# Determine which command we're completing for
3132
local cmd=""
3233
local i
3334
for ((i=1; i < cword; i++)); do
3435
case "${words[i]}" in
35-
transform|count|list-errors|summarise-errors|stats)
36+
transform|count|list-errors|summarise-errors|stats|latest)
3637
cmd="${words[i]}"
3738
break
3839
;;
@@ -91,6 +92,11 @@ _sensor_data() {
9192
# Can't complete dates, leave empty
9293
return
9394
;;
95+
-n)
96+
# Suggest some common values for -n
97+
COMPREPLY=($(compgen -W "1 5 10 -1 -5 -10" -- "$cur"))
98+
return
99+
;;
94100
esac
95101

96102
# Complete based on command
@@ -111,6 +117,9 @@ _sensor_data() {
111117
stats)
112118
COMPREPLY=($(compgen -W "$common_opts $stats_opts" -- "$cur"))
113119
;;
120+
latest)
121+
COMPREPLY=($(compgen -W "$common_opts $latest_opts" -- "$cur"))
122+
;;
114123
esac
115124
else
116125
# Complete file/directory names

debian/sensor-data.1

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ Also shows quartiles (Q1, median, Q3, IQR), outlier detection (1.5*IQR method),
4444
delta statistics for consecutive value changes, and volatility.
4545
If timestamps are present, shows time range, duration, readings rate,
4646
typical sampling interval, and gap detection.
47+
.TP
48+
.B latest
49+
Show the latest timestamp for each sensor_id.
50+
Outputs sensor_id, unix_timestamp, and ISO date for each sensor's most recent reading.
4751
.SH GLOBAL OPTIONS
4852
.TP
4953
.BR \-h ", " \-\-help
@@ -250,6 +254,37 @@ Filter readings before this date.
250254
.TP
251255
.BI \-\-tail " n"
252256
Only read the last n lines from each file (useful for quick checks on large files).
257+
.SH LATEST OPTIONS
258+
.TP
259+
.BI \-n " num"
260+
Limit output rows (positive = first n sensors, negative = last n sensors).
261+
.TP
262+
.BR \-of ", " \-\-output\-format " " \fIfmt\fR
263+
Output format: human (default), csv, or json.
264+
.TP
265+
.BR \-if ", " \-\-input\-format " " \fIfmt\fR
266+
Input format for stdin: json or csv (default: json).
267+
.TP
268+
.BR \-r ", " \-\-recursive
269+
Recursively process subdirectories.
270+
.TP
271+
.B \-v
272+
Verbose output.
273+
.TP
274+
.BR \-e ", " \-\-extension " " \fIext\fR
275+
Filter files by extension (e.g., .out or out).
276+
.TP
277+
.BR \-d ", " \-\-depth " " \fIn\fR
278+
Maximum recursion depth (0 = current dir only).
279+
.TP
280+
.BI \-\-min\-date " date"
281+
Filter readings after this date.
282+
.TP
283+
.BI \-\-max\-date " date"
284+
Filter readings before this date.
285+
.TP
286+
.BI \-\-tail " n"
287+
Only read the last n lines from each file (useful for quick checks on large files).
253288
.SH EXAMPLES
254289
Transform a JSON sensor file to CSV:
255290
.PP

include/latest_finder.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#ifndef LATEST_FINDER_H
2+
#define LATEST_FINDER_H
3+
4+
#include "command_base.h"
5+
#include <string>
6+
7+
class LatestFinder : public CommandBase {
8+
public:
9+
LatestFinder(int argc, char* argv[]);
10+
int main();
11+
static void usage();
12+
13+
private:
14+
int limitRows; // -n parameter: positive = first n, negative = last n, 0 = all
15+
std::string outputFormat; // "human" (default), "csv", or "json"
16+
};
17+
18+
#endif // LATEST_FINDER_H

src/latest_finder.cpp

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#include "latest_finder.h"
2+
#include "common_arg_parser.h"
3+
#include "data_reader.h"
4+
#include <iostream>
5+
#include <iomanip>
6+
#include <map>
7+
#include <vector>
8+
#include <algorithm>
9+
#include <ctime>
10+
#include <cstring>
11+
12+
// Structure to hold latest reading info per sensor
13+
struct SensorLatest {
14+
std::string sensorId;
15+
long long timestamp;
16+
17+
SensorLatest() : timestamp(0) {}
18+
};
19+
20+
LatestFinder::LatestFinder(int argc, char* argv[]) : limitRows(0), outputFormat("human") {
21+
// Check for help flag first
22+
for (int i = 1; i < argc; ++i) {
23+
std::string arg = argv[i];
24+
if (arg == "--help" || arg == "-h") {
25+
usage();
26+
exit(0);
27+
}
28+
}
29+
30+
// Parse custom arguments first and build filtered argv
31+
std::vector<char*> filteredArgv;
32+
for (int i = 0; i < argc; i++) {
33+
std::string arg = argv[i];
34+
if (arg == "-n" && i + 1 < argc) {
35+
limitRows = std::atoi(argv[i + 1]);
36+
i++; // Skip the value
37+
continue;
38+
}
39+
if ((arg == "-of" || arg == "--output-format") && i + 1 < argc) {
40+
outputFormat = argv[i + 1];
41+
i++; // Skip the value
42+
continue;
43+
}
44+
filteredArgv.push_back(argv[i]);
45+
}
46+
47+
CommonArgParser parser;
48+
if (!parser.parse(static_cast<int>(filteredArgv.size()), filteredArgv.data())) {
49+
exit(1);
50+
}
51+
52+
// Check for unknown options
53+
std::string unknownOpt = CommonArgParser::checkUnknownOptions(
54+
static_cast<int>(filteredArgv.size()), filteredArgv.data());
55+
if (!unknownOpt.empty()) {
56+
std::cerr << "Error: Unknown option '" << unknownOpt << "'" << std::endl;
57+
usage();
58+
exit(1);
59+
}
60+
61+
copyFromParser(parser);
62+
}
63+
64+
void LatestFinder::usage() {
65+
std::cerr << "Usage: sensor-data latest [OPTIONS] <file(s)/directory>\n"
66+
<< " Outputs the latest timestamp for each sensor_id\n"
67+
<< "\nOptions:\n"
68+
<< " -n <num> Limit output rows (positive = first n, negative = last n)\n"
69+
<< " -of, --output-format <fmt> Output format: human (default), csv, or json\n"
70+
<< " --min-date <date> Only consider readings after this date\n"
71+
<< " --max-date <date> Only consider readings before this date\n"
72+
<< " -if, --input-format <fmt> Input format: json (default) or csv\n"
73+
<< " --tail <n> Only read last n lines from each file\n"
74+
<< " -v, --verbose Show verbose output\n"
75+
<< " -h, --help Show this help message\n"
76+
<< "\nOutput columns: sensor_id, unix_timestamp, iso_date\n";
77+
}
78+
79+
int LatestFinder::main() {
80+
if (inputFiles.empty()) {
81+
std::cerr << "Error: No input files specified\n";
82+
usage();
83+
return 1;
84+
}
85+
86+
printCommonVerboseInfo("latest", verbosity, recursive, extensionFilter, maxDepth, inputFiles.size());
87+
88+
// Map to store latest timestamp per sensor_id
89+
std::map<std::string, SensorLatest> latestBySensor;
90+
91+
// Create data reader with date filters
92+
DataReader reader(minDate, maxDate, verbosity, inputFormat, tailLines);
93+
94+
// Process all files
95+
for (const std::string& file : inputFiles) {
96+
if (verbosity > 0) {
97+
std::cerr << "Processing: " << file << "\n";
98+
}
99+
100+
reader.processFile(file, [&](const std::map<std::string, std::string>& reading, int, const std::string&) {
101+
// Get sensor_id
102+
auto sensorIt = reading.find("sensor_id");
103+
if (sensorIt == reading.end() || sensorIt->second.empty()) {
104+
return;
105+
}
106+
std::string sensorId = sensorIt->second;
107+
108+
// Get timestamp
109+
long long ts = DateUtils::getTimestamp(reading);
110+
if (ts <= 0) return;
111+
112+
// Update if this is the latest for this sensor
113+
auto& entry = latestBySensor[sensorId];
114+
if (ts > entry.timestamp) {
115+
entry.sensorId = sensorId;
116+
entry.timestamp = ts;
117+
}
118+
});
119+
}
120+
121+
// Convert to vector for sorting and limiting
122+
std::vector<SensorLatest> results;
123+
for (const auto& pair : latestBySensor) {
124+
results.push_back(pair.second);
125+
}
126+
127+
// Sort by sensor_id (natural order from map)
128+
std::sort(results.begin(), results.end(), [](const SensorLatest& a, const SensorLatest& b) {
129+
return a.sensorId < b.sensorId;
130+
});
131+
132+
// Apply -n limiting
133+
size_t startIdx = 0;
134+
size_t endIdx = results.size();
135+
136+
if (limitRows != 0) {
137+
if (limitRows > 0) {
138+
// First n rows
139+
endIdx = std::min(static_cast<size_t>(limitRows), results.size());
140+
} else {
141+
// Last n rows (negative)
142+
size_t count = static_cast<size_t>(-limitRows);
143+
if (count < results.size()) {
144+
startIdx = results.size() - count;
145+
}
146+
}
147+
}
148+
149+
// Output results
150+
if (outputFormat == "json") {
151+
std::cout << "[";
152+
bool first = true;
153+
for (size_t i = startIdx; i < endIdx; ++i) {
154+
const SensorLatest& entry = results[i];
155+
char isoDate[32];
156+
time_t t = static_cast<time_t>(entry.timestamp);
157+
std::strftime(isoDate, sizeof(isoDate), "%Y-%m-%d %H:%M:%S", std::localtime(&t));
158+
159+
if (!first) std::cout << ",";
160+
first = false;
161+
std::cout << "{\"sensor_id\":\"" << entry.sensorId
162+
<< "\",\"timestamp\":" << entry.timestamp
163+
<< ",\"iso_date\":\"" << isoDate << "\"}";
164+
}
165+
std::cout << "]\n";
166+
} else if (outputFormat == "csv") {
167+
std::cout << "sensor_id,timestamp,iso_date\n";
168+
for (size_t i = startIdx; i < endIdx; ++i) {
169+
const SensorLatest& entry = results[i];
170+
char isoDate[32];
171+
time_t t = static_cast<time_t>(entry.timestamp);
172+
std::strftime(isoDate, sizeof(isoDate), "%Y-%m-%d %H:%M:%S", std::localtime(&t));
173+
std::cout << entry.sensorId << "," << entry.timestamp << "," << isoDate << "\n";
174+
}
175+
} else {
176+
// Human-readable format (default)
177+
// Find max sensor_id width for alignment
178+
size_t maxIdWidth = 9; // "sensor_id"
179+
for (size_t i = startIdx; i < endIdx; ++i) {
180+
maxIdWidth = std::max(maxIdWidth, results[i].sensorId.length());
181+
}
182+
183+
std::cout << "Latest readings by sensor:\n\n";
184+
std::cout << std::left;
185+
std::cout.width(maxIdWidth + 2);
186+
std::cout << "Sensor ID";
187+
std::cout.width(14);
188+
std::cout << "Timestamp";
189+
std::cout << "Date/Time\n";
190+
std::cout << std::string(maxIdWidth + 2 + 14 + 19, '-') << "\n";
191+
192+
for (size_t i = startIdx; i < endIdx; ++i) {
193+
const SensorLatest& entry = results[i];
194+
char isoDate[32];
195+
time_t t = static_cast<time_t>(entry.timestamp);
196+
std::strftime(isoDate, sizeof(isoDate), "%Y-%m-%d %H:%M:%S", std::localtime(&t));
197+
198+
std::cout.width(maxIdWidth + 2);
199+
std::cout << entry.sensorId;
200+
std::cout.width(14);
201+
std::cout << entry.timestamp;
202+
std::cout << isoDate << "\n";
203+
}
204+
205+
std::cout << "\nTotal: " << (endIdx - startIdx) << " sensor(s)\n";
206+
}
207+
208+
return 0;
209+
}

0 commit comments

Comments
 (0)