1+ // SPDX-License-Identifier: Apache-2.0
12package org .hiero .block .tools .days .subcommands ;
23
34import io .helidon .http .NotFoundException ;
4- import org .hiero .block .tools .days .download .DownloadConstants ;
5- import org .hiero .block .tools .utils .gcp .MainNetBucket ;
6- import picocli .CommandLine .Command ;
7- import picocli .CommandLine .Option ;
8-
95import java .io .File ;
106import java .nio .file .Files ;
117import java .nio .file .Path ;
1713import java .util .TreeSet ;
1814import java .util .concurrent .*;
1915import java .util .stream .Stream ;
16+ import org .hiero .block .tools .days .download .DownloadConstants ;
17+ import org .hiero .block .tools .utils .gcp .MainNetBucket ;
18+ import picocli .CommandLine .Command ;
19+ import picocli .CommandLine .Option ;
2020
2121/**
2222 * Randomly samples (date, hour) combinations and compares:
3131 *
3232 */
3333@ Command (
34- name = "gcp-ls" ,
35- description = "Randomly sample (day, hour) and compare GCP LS vs local downloadedDays signature counts"
36- )
34+ name = "gcp-ls" ,
35+ description = "Randomly sample (day, hour) and compare GCP LS vs local downloadedDays signature counts" )
3736public class GcpLsCommand implements Runnable {
3837
3938 @ Option (names = "--start-date" , required = true , description = "Start date (inclusive), yyyy-MM-dd" )
@@ -45,53 +44,51 @@ public class GcpLsCommand implements Runnable {
4544 @ Option (names = "--samples" , description = "Number of random (day, hour) samples" , defaultValue = "10" )
4645 private int samples ;
4746
48- @ Option (names = "--downloaded-days-dir" ,
49- description = "Root of downloadedDays directory (YYYY/MM/DD/record0.0.<node> structure)" )
47+ @ Option (
48+ names = "--downloaded-days-dir" ,
49+ description = "Root of downloadedDays directory (YYYY/MM/DD/record0.0.<node> structure)" )
5050 private Path downloadedDaysDir ;
5151
52- @ Option (names = "--node-count" ,
53- description = "Number of record nodes (record0.0.x), usually 31 for 0..30" ,
54- defaultValue = "31" )
52+ @ Option (
53+ names = "--node-count" ,
54+ description = "Number of record nodes (record0.0.x), usually 31 for 0..30" ,
55+ defaultValue = "31" )
5556 private int nodeCount ;
5657
5758 @ Option (
58- names = "--skip-local-compare" ,
59- description = "Skip comparing against local downloadedDays dir; only show GCP signature counts" )
59+ names = "--skip-local-compare" ,
60+ description = "Skip comparing against local downloadedDays dir; only show GCP signature counts" )
6061 private boolean skipLocalCompare = false ;
6162
62- @ Option (
63- names = "--show-ls" ,
64- description = "Show full list of GCP/local record files per node/hour" )
63+ @ Option (names = "--show-ls" , description = "Show full list of GCP/local record files per node/hour" )
6564 private boolean showLs = false ;
6665
6766 @ Option (
68- names = {"-c" , "--cache-dir" },
69- description = "Directory for GCS cache (default: data/gcp-cache)" )
67+ names = {"-c" , "--cache-dir" },
68+ description = "Directory for GCS cache (default: data/gcp-cache)" )
7069 private File cacheDir = new File ("data/gcp-cache" );
7170
7271 @ Option (
73- names = {"--cache" },
74- description = "Enable GCS caching (default: false)" )
72+ names = {"--cache" },
73+ description = "Enable GCS caching (default: false)" )
7574 private boolean cacheEnabled = false ;
7675
7776 @ Option (
78- names = {"--min-node" },
79- description = "Minimum node account ID (default: 3)" )
77+ names = {"--min-node" },
78+ description = "Minimum node account ID (default: 3)" )
8079 private int minNodeAccountId = 3 ;
8180
8281 @ Option (
83- names = {"--max-node" },
84- description = "Maximum node account ID (default: 37)" )
82+ names = {"--max-node" },
83+ description = "Maximum node account ID (default: 37)" )
8584 private int maxNodeAccountId = 37 ;
8685
87- @ Option (names = "--parallelism" ,
88- description = "Thread pool size for parallel node queries" ,
89- defaultValue = "8" )
86+ @ Option (names = "--parallelism" , description = "Thread pool size for parallel node queries" , defaultValue = "8" )
9087 private int parallelism ;
9188
9289 @ Option (
93- names = {"-p" , "--user-project" },
94- description = "GCP project to bill for requester-pays bucket access (default: from GCP_PROJECT_ID env var)" )
90+ names = {"-p" , "--user-project" },
91+ description = "GCP project to bill for requester-pays bucket access (default: from GCP_PROJECT_ID env var)" )
9592 private String userProject = DownloadConstants .GCP_PROJECT_ID ;
9693
9794 /**
@@ -104,7 +101,8 @@ public class GcpLsCommand implements Runnable {
104101 * Typical wiring: new CommandLine(new GcpLsCommand(mainNetBucket)).execute(args);
105102 */
106103 public GcpLsCommand () {
107- this .mainNetBucket = new MainNetBucket (true , cacheDir .toPath (), minNodeAccountId , maxNodeAccountId , userProject );
104+ this .mainNetBucket =
105+ new MainNetBucket (true , cacheDir .toPath (), minNodeAccountId , maxNodeAccountId , userProject );
108106 }
109107
110108 @ Override
@@ -116,10 +114,7 @@ public void run() {
116114
117115 validateInputs ();
118116
119- System .out .printf (
120- "Sampling %d (day, hour) slots between %s and %s%n" ,
121- samples , startDate , endDate
122- );
117+ System .out .printf ("Sampling %d (day, hour) slots between %s and %s%n" , samples , startDate , endDate );
123118
124119 long daysRange = ChronoUnit .DAYS .between (startDate , endDate ) + 1 ;
125120
@@ -130,20 +125,15 @@ public void run() {
130125 LocalDate sampleDate = startDate .plusDays (dayOffset );
131126 int hour = ThreadLocalRandom .current ().nextInt (24 );
132127
128+ System .out .printf ("%n=== Sample %d: date=%s hour=%02d ===%n" , i + 1 , sampleDate , hour );
133129 System .out .printf (
134- "%n=== Sample %d: date=%s hour=%02d ===%n" ,
135- i + 1 , sampleDate , hour
136- );
137- System .out .printf ("%-8s %-10s %-10s %-10s %-8s%n" ,
138- "Node" , "gcpFiles" , "localFiles" , "diff" , "status" );
130+ "%-8s %-10s %-10s %-10s %-8s%n" , "Node" , "gcpFiles" , "localFiles" , "diff" , "status" );
139131
140132 // submit node tasks in parallel
141133 List <Future <NodeResult >> futures = new ArrayList <>();
142134 for (int node = 0 ; node < nodeCount ; node ++) {
143135 final int nodeId = node ;
144- futures .add (executor .submit (() ->
145- checkNodeForHour (sampleDate , hour , nodeId )
146- ));
136+ futures .add (executor .submit (() -> checkNodeForHour (sampleDate , hour , nodeId )));
147137 }
148138
149139 // gather results in node order
@@ -158,12 +148,9 @@ public void run() {
158148 if (r .gcpSigCount < 0 ) {
159149 status = "ERROR" ;
160150 }
161- System .out .printf ("%-8s %-10d %-10d %-10d %-8s%n" ,
162- "0.0." + r .node ,
163- r .gcpSigCount ,
164- r .localSigCount ,
165- r .diff ,
166- status );
151+ System .out .printf (
152+ "%-8s %-10d %-10d %-10d %-8s%n" ,
153+ "0.0." + r .node , r .gcpSigCount , r .localSigCount , r .diff , status );
167154 }
168155
169156 // enriched differences view
@@ -239,20 +226,20 @@ private NodeResult checkNodeForHour(LocalDate date, int hour, int node) {
239226 final String hourToken = date + "T" + String .format ("%02d" , hour ) + "_" ;
240227
241228 gcpSigFiles = blobs .stream ()
242- .map (b -> b .path ())
243- // restrict to this hour based on filename in the path
244- .filter (path -> path .contains (hourToken ))
245- // only record files
246- .filter (this ::isRecordName )
247- // keep only this node's prefix, e.g. ".../record0.0.3/..."
248- .filter (path -> matchesNode (path , node ))
249- .map (this ::extractFileName )
250- .toList ();
229+ .map (b -> b .path ())
230+ // restrict to this hour based on filename in the path
231+ .filter (path -> path .contains (hourToken ))
232+ // only record files
233+ .filter (this ::isRecordName )
234+ // keep only this node's prefix, e.g. ".../record0.0.3/..."
235+ .filter (path -> matchesNode (path , node ))
236+ .map (this ::extractFileName )
237+ .toList ();
251238
252239 gcpSigCount = gcpSigFiles .size ();
253240 } catch (Exception e ) {
254- System .err .printf ("Error listing GCP for node 0.0.%d date=%s hour=%02d: %s%n" ,
255- node , date , hour , e .getMessage ());
241+ System .err .printf (
242+ "Error listing GCP for node 0.0.%d date=%s hour=%02d: %s%n" , node , date , hour , e .getMessage ());
256243 gcpSigCount = -1 ; // sentinel meaning "GCP listing failed"
257244 }
258245
@@ -285,14 +272,15 @@ private NodeResult checkNodeForHour(LocalDate date, int hour, int node) {
285272 }
286273 }
287274
288- return new NodeResult (node ,
289- gcpSigCount ,
290- localSigCount ,
291- diff ,
292- gcpOnly ,
293- localOnly ,
294- gcpSigFiles ,
295- skipLocalCompare ? List .of () : listLocalRecordFiles (date , hour , node ));
275+ return new NodeResult (
276+ node ,
277+ gcpSigCount ,
278+ localSigCount ,
279+ diff ,
280+ gcpOnly ,
281+ localOnly ,
282+ gcpSigFiles ,
283+ skipLocalCompare ? List .of () : listLocalRecordFiles (date , hour , node ));
296284 }
297285
298286 private boolean isRecordName (String name ) {
@@ -325,9 +313,9 @@ private long countLocalRecords(LocalDate date, int hour, int node) {
325313
326314 private List <String > listLocalRecordFiles (LocalDate date , int hour , int node ) {
327315 Path dayDir = downloadedDaysDir
328- .resolve (String .valueOf (date .getYear ()))
329- .resolve (String .format ("%02d" , date .getMonthValue ()))
330- .resolve (String .format ("%02d" , date .getDayOfMonth ()));
316+ .resolve (String .valueOf (date .getYear ()))
317+ .resolve (String .format ("%02d" , date .getMonthValue ()))
318+ .resolve (String .format ("%02d" , date .getDayOfMonth ()));
331319
332320 // per-node subdirectory
333321 dayDir = dayDir .resolve ("record0.0." + node );
@@ -339,13 +327,12 @@ private List<String> listLocalRecordFiles(LocalDate date, int hour, int node) {
339327 String filenamePrefix = date + "T" + String .format ("%02d" , hour ) + "_" ;
340328
341329 try (Stream <Path > stream = Files .list (dayDir )) {
342- return stream
343- .filter (Files ::isRegularFile )
344- .map (Path ::getFileName )
345- .map (Path ::toString )
346- .filter (name -> name .startsWith (filenamePrefix ))
347- .filter (this ::isRecordName )
348- .toList ();
330+ return stream .filter (Files ::isRegularFile )
331+ .map (Path ::getFileName )
332+ .map (Path ::toString )
333+ .filter (name -> name .startsWith (filenamePrefix ))
334+ .filter (this ::isRecordName )
335+ .toList ();
349336 } catch (Exception e ) {
350337 System .err .println ("Error listing local record files in " + dayDir + ": " + e .getMessage ());
351338 return List .of ();
@@ -362,14 +349,15 @@ private static final class NodeResult {
362349 final List <String > gcpFiles ;
363350 final List <String > localFiles ;
364351
365- private NodeResult (int node ,
366- long gcpSigCount ,
367- long localSigCount ,
368- long diff ,
369- List <String > gcpOnly ,
370- List <String > localOnly ,
371- List <String > gcpFiles ,
372- List <String > localFiles ) {
352+ private NodeResult (
353+ int node ,
354+ long gcpSigCount ,
355+ long localSigCount ,
356+ long diff ,
357+ List <String > gcpOnly ,
358+ List <String > localOnly ,
359+ List <String > gcpFiles ,
360+ List <String > localFiles ) {
373361 this .node = node ;
374362 this .gcpSigCount = gcpSigCount ;
375363 this .localSigCount = localSigCount ;
0 commit comments