Skip to content

Commit f39689e

Browse files
committed
add --bytes and --output-path options
1 parent d97cb0f commit f39689e

File tree

4 files changed

+178
-81
lines changed

4 files changed

+178
-81
lines changed

README.md

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ $ export PATH=$PATH:`pwd`/target/appassembler/bin
3636

3737
```bash
3838
$ coop --help
39+
3940
USAGE
4041
coop [-hV] [COMMAND]
4142

@@ -46,6 +47,7 @@ E.g.
4647
$ coop ls s3://... | head -n 4
4748
$ coop ls s3://... | grep -m 10 -e '...'
4849
$ coop ls s3://... | cut -f 2 | sort -n -r
50+
$ coop ls s3://... -o result.zst
4951

5052

5153
OPTIONS
@@ -63,7 +65,7 @@ At present the only command is `ls`/`list`
6365
$ coop ls --help
6466

6567
USAGE
66-
coop ls [-hV] [--anonymous] [--human-readable] [--reverse-columns] [--show-header] [--summarize] [--verbose] [--region=<region>] <uris>...
68+
coop ls [-hV] [--anonymous] [--bytes] [--checksums] [--human-readable] [--reverse-columns] [--show-header] [--summarize] [--verbose] [-o=<outputPath>] [--region=<region>] <uris>...
6769

6870
List s3 paths recursively with content sizes.
6971

@@ -72,19 +74,23 @@ E.g.
7274
$ coop ls s3://... | head -n 4
7375
$ coop ls s3://... | grep -m 10 -e '...'
7476
$ coop ls s3://... | cut -f 2 | sort -n -r
77+
$ coop ls s3://... -o result.zst
7578

7679

7780
PARAMETERS
78-
<uris>... One or more s3 URIs.
81+
<uris>... One or more s3 URIs.
7982

8083
OPTIONS
81-
--region=<region> AWS region, default us-east-1.
82-
--anonymous Use anonymous AWS credentials.
83-
--human-readable Format content sizes in binary multi-byte units.
84-
--show-header Show column header row in output.
85-
--reverse-columns Reverse the order of output columns.
86-
--summarize Summarize counts and sizes per input URI.
87-
--verbose Show additional logging messages.
88-
-h, --help Show this help message and exit.
89-
-V, --version Print version information and exit.
84+
--region=<region> AWS region, default us-east-1.
85+
--anonymous Use anonymous AWS credentials.
86+
--bytes Format content sizes as bytes.
87+
--human-readable Format content sizes in binary multi-byte units.
88+
--show-header Show column header row in output.
89+
--reverse-columns Reverse the order of output columns.
90+
--checksums Show checksum values, if available.
91+
--summarize Summarize counts and sizes per input URI.
92+
-o, --output-path=<outputPath> Output path, optionally compressed (.gz,.bgz,.zst). Default stdout.
93+
--verbose Show additional logging messages.
94+
-h, --help Show this help message and exit.
95+
-V, --version Print version information and exit.
9096
```

pom.xml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
</issueManagement>
3232
<properties>
3333
<awssdk.version>2.33.11</awssdk.version>
34+
<dsh-compress.version>1.8</dsh-compress.version>
3435
<picocli.version>4.7.7</picocli.version>
3536
<slf4j.version>1.7.36</slf4j.version>
3637
</properties>
@@ -41,6 +42,11 @@
4142
<artifactId>picocli</artifactId>
4243
<version>${picocli.version}</version>
4344
</dependency>
45+
<dependency>
46+
<groupId>org.dishevelled</groupId>
47+
<artifactId>dsh-compress</artifactId>
48+
<version>${dsh-compress.version}</version>
49+
</dependency>
4450
<dependency>
4551
<groupId>org.slf4j</groupId>
4652
<artifactId>slf4j-api</artifactId>
@@ -76,6 +82,11 @@
7682
<artifactId>junit</artifactId>
7783
<scope>test</scope>
7884
</dependency>
85+
<dependency>
86+
<groupId>org.dishevelled</groupId>
87+
<artifactId>dsh-compress</artifactId>
88+
<scope>compile</scope>
89+
</dependency>
7990
<dependency>
8091
<groupId>org.slf4j</groupId>
8192
<artifactId>slf4j-simple</artifactId>

src/main/java/com/github/heuermh/cooper/Ls.java

Lines changed: 143 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@
1515
*/
1616
package com.github.heuermh.cooper;
1717

18+
import static org.dishevelled.compress.Writers.writer;
19+
20+
import java.io.PrintWriter;
21+
22+
import java.nio.file.Path;
23+
1824
import java.util.Arrays;
1925
import java.util.HashMap;
2026
import java.util.List;
@@ -31,6 +37,8 @@
3137
import org.slf4j.LoggerFactory;
3238

3339
import picocli.CommandLine.Command;
40+
import picocli.CommandLine.Option;
41+
import picocli.CommandLine.Parameters;
3442

3543
import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider;
3644

@@ -53,36 +61,42 @@
5361
@Command(name = "ls", aliases={"list"})
5462
public final class Ls implements Callable<Integer> {
5563

56-
@picocli.CommandLine.Option(
64+
@Option(
5765
names = { "--region" },
5866
type = Region.class,
5967
converter = RegionConverter.class,
60-
defaultValue = "us-east-1"
68+
defaultValue = "us-east-2"
6169
)
6270
private Region region;
6371

64-
@picocli.CommandLine.Option(names = { "--anonymous" })
72+
@Option(names = { "--anonymous" })
6573
private boolean anonymous;
6674

67-
@picocli.CommandLine.Option(names = { "--human-readable" })
75+
@Option(names = { "--bytes" })
76+
private boolean bytes;
77+
78+
@Option(names = { "--human-readable" })
6879
private boolean humanReadable;
6980

70-
@picocli.CommandLine.Option(names = { "--show-header" })
81+
@Option(names = { "--show-header" })
7182
private boolean showHeader;
7283

73-
@picocli.CommandLine.Option(names = { "--reverse-columns" })
84+
@Option(names = { "--reverse-columns" })
7485
private boolean reverseColumns;
7586

76-
@picocli.CommandLine.Option(names = { "--checksums" })
87+
@Option(names = { "--checksums" })
7788
private boolean checksums;
7889

79-
@picocli.CommandLine.Option(names = { "--summarize" })
90+
@Option(names = { "--summarize" })
8091
private boolean summarize;
8192

82-
@picocli.CommandLine.Option(names = { "--verbose" })
93+
@Option(names = { "--output-path", "-o" })
94+
private Path outputPath;
95+
96+
@Option(names = { "--verbose" })
8397
private boolean verbose;
8498

85-
@picocli.CommandLine.Parameters(index = "0..*", arity = "1..*", descriptionKey = "uris")
99+
@Parameters(index = "0..*", arity = "1..*", descriptionKey = "uris")
86100
private List<String> uris;
87101

88102
/** Logger. */
@@ -106,87 +120,148 @@ public Integer call() throws Exception {
106120
}
107121
S3Client s3 = builder.build();
108122

109-
if (showHeader) {
110-
if (summarize) {
111-
System.out.println(reverseColumns ? "size\tcount\turi" : "uri\tcount\tsize");
112-
}
113-
else if (checksums) {
114-
System.out.println(reverseColumns ? "size\tchecksum_type\tchecksum_algorithms\te_tag\turi" : "uri\tchecksum_type\tchecksum_algorithms\te_tag\tsize");
115-
}
116-
else {
117-
System.out.println(reverseColumns ? "size\turi" : "uri\tsize");
118-
}
123+
// warn if --summarize and --checksums
124+
if (summarize && checksums) {
125+
logger.warn("--summarize does not show checksums, even if --checksums provided");
119126
}
120127

121-
Map<String, Integer> counts = new HashMap<String, Integer>();
122-
Map<String, Long> sizes = new HashMap<String, Long>();
123-
124-
for (String uri : uris) {
125-
Matcher m = S3_URI.matcher(uri);
126-
if (m.matches()) {
127-
String bucket = m.group(1);
128-
String prefix = m.group(2);
129-
130-
logger.info("valid uri={} bucket={} prefix={}", uri, bucket, prefix);
128+
try (PrintWriter writer = writer(outputPath)) {
131129

132-
ListObjectsV2Request.Builder requestBuilder = ListObjectsV2Request.builder()
133-
.bucket(bucket);
134-
135-
if (prefix != null && !prefix.trim().isEmpty()) {
136-
requestBuilder = requestBuilder.prefix(prefix);
130+
// show header, if --show-header
131+
if (showHeader) {
132+
if (summarize) {
133+
if (bytes && humanReadable) {
134+
writer.println(reverseColumns ? "bytes\thuman_readable\tcount\turi" : "uri\tcount\tbytes\thuman_readable");
135+
}
136+
else {
137+
writer.println(reverseColumns ? "size\tcount\turi" : "uri\tcount\tsize");
138+
}
139+
}
140+
else if (checksums) {
141+
if (bytes && humanReadable) {
142+
writer.println(reverseColumns ? "bytes\thuman_readable\tchecksum_type\tchecksum_algorithms\te_tag\turi" : "uri\tchecksum_type\tchecksum_algorithms\te_tag\tbytes\thuman_readable");
143+
}
144+
else {
145+
writer.println(reverseColumns ? "size\tchecksum_type\tchecksum_algorithms\te_tag\turi" : "uri\tchecksum_type\tchecksum_algorithms\te_tag\tsize");
146+
}
137147
}
148+
else {
149+
if (bytes && humanReadable) {
150+
writer.println(reverseColumns ? "bytes\thuman_readable\turi" : "uri\tbytes\thuman_readable");
151+
}
152+
else {
153+
writer.println(reverseColumns ? "size\turi" : "uri\tsize");
154+
}
155+
}
156+
}
138157

139-
ListObjectsV2Request request = requestBuilder.build();
158+
Joiner joiner = Joiner.on("\t");
159+
Map<String, Integer> counts = new HashMap<String, Integer>();
160+
Map<String, Long> sizes = new HashMap<String, Long>();
140161

141-
logger.info("ListObjectsV2 request={}", request.toString());
162+
for (String uri : uris) {
163+
Matcher m = S3_URI.matcher(uri);
164+
if (m.matches()) {
165+
String bucket = m.group(1);
166+
String prefix = m.group(2);
142167

143-
ListObjectsV2Iterable responses = s3.listObjectsV2Paginator(request);
144-
for (ListObjectsV2Response response : responses) {
168+
logger.info("valid uri={} bucket={} prefix={}", uri, bucket, prefix);
145169

146-
logger.info("ListObjectsV2 response={}", response.toString());
170+
ListObjectsV2Request.Builder requestBuilder = ListObjectsV2Request.builder().bucket(bucket);
147171

148-
for (S3Object content : response.contents()) {
172+
if (prefix != null && !prefix.trim().isEmpty()) {
173+
requestBuilder = requestBuilder.prefix(prefix);
174+
}
149175

150-
String s3Path = "s3://" + bucket + "/" + content.key();
176+
ListObjectsV2Request request = requestBuilder.build();
177+
logger.info("ListObjectsV2 request={}", request.toString());
151178

152-
if (s3Path.startsWith(uri)) {
179+
ListObjectsV2Iterable responses = s3.listObjectsV2Paginator(request);
180+
for (ListObjectsV2Response response : responses) {
153181

154-
String size = humanReadable ? FORMATTER.format(content.size()) : String.valueOf(content.size());
182+
logger.info("ListObjectsV2 response={}", response.toString());
155183

156-
if (summarize) {
157-
counts.put(uri, counts.containsKey(uri) ? counts.get(uri) + 1 : 1);
158-
sizes.put(uri, sizes.containsKey(uri) ? sizes.get(uri) + content.size() : content.size());
159-
}
160-
else if (checksums) {
161-
String checksumType = content.checksumTypeAsString();
162-
String checksumAlgorithms = Joiner.on(",").join(content.checksumAlgorithmAsStrings());
184+
for (S3Object content : response.contents()) {
185+
String s3Path = "s3://" + bucket + "/" + content.key();
163186

164-
// why is this value quoted?
165-
String eTag = content.eTag().replace("\"", "");
187+
if (s3Path.startsWith(uri)) {
188+
String byteSize = String.valueOf(content.size());
189+
String humanReadableSize = FORMATTER.format(content.size());
166190

167-
if (reverseColumns) {
168-
System.out.println(Joiner.on("\t").join(size, checksumType, checksumAlgorithms, eTag, s3Path));
191+
if (summarize) {
192+
counts.put(uri, counts.containsKey(uri) ? counts.get(uri) + 1 : 1);
193+
sizes.put(uri, sizes.containsKey(uri) ? sizes.get(uri) + content.size() : content.size());
194+
}
195+
else if (checksums) {
196+
String checksumType = content.checksumTypeAsString();
197+
String checksumAlgorithms = Joiner.on(",").join(content.checksumAlgorithmAsStrings());
198+
199+
// why is this value quoted?
200+
String eTag = content.eTag().replace("\"", "");
201+
202+
// format per --bytes, --human-readable, --reverse-columns
203+
if (bytes && humanReadable) {
204+
if (reverseColumns) {
205+
writer.println(joiner.join(byteSize, humanReadableSize, checksumType, checksumAlgorithms, eTag, s3Path));
206+
}
207+
else {
208+
writer.println(joiner.join(s3Path, checksumType, checksumAlgorithms, eTag, byteSize, humanReadableSize));
209+
}
210+
}
211+
else if (humanReadable) {
212+
if (reverseColumns) {
213+
writer.println(joiner.join(humanReadableSize, checksumType, checksumAlgorithms, eTag, s3Path));
214+
}
215+
else {
216+
writer.println(joiner.join(s3Path, checksumType, checksumAlgorithms, eTag, humanReadableSize));
217+
}
218+
}
219+
else {
220+
if (reverseColumns) {
221+
writer.println(joiner.join(byteSize, checksumType, checksumAlgorithms, eTag, s3Path));
222+
}
223+
else {
224+
writer.println(joiner.join(s3Path, checksumType, checksumAlgorithms, eTag, byteSize));
225+
}
226+
}
169227
}
170228
else {
171-
System.out.println(Joiner.on("\t").join(s3Path, checksumType, checksumAlgorithms, eTag, size));
229+
// format per --bytes, --human-readable, --reverse-columns
230+
if (bytes && humanReadable) {
231+
writer.println(reverseColumns ? joiner.join(byteSize, humanReadableSize, s3Path) : joiner.join(s3Path, byteSize, humanReadableSize));
232+
}
233+
else if (humanReadable) {
234+
writer.println(reverseColumns ? joiner.join(humanReadableSize, s3Path) : joiner.join(s3Path, humanReadableSize));
235+
}
236+
else {
237+
writer.println(reverseColumns ? joiner.join(byteSize, s3Path) : joiner.join(s3Path, byteSize));
238+
}
172239
}
173240
}
174-
else {
175-
System.out.println(reverseColumns ? size + "\t" + s3Path : s3Path + "\t" + size);
176-
}
177241
}
178242
}
179243
}
244+
else {
245+
logger.warn("uri {} not a valid s3 URI", uri);
246+
}
180247
}
181-
else {
182-
logger.warn("uri {} not a valid s3 URI", uri);
183-
}
184-
}
185-
if (summarize) {
186-
for (String uri : counts.keySet()) {
187-
Integer count = counts.get(uri);
188-
String size = humanReadable ? FORMATTER.format(sizes.get(uri)) : String.valueOf(sizes.get(uri));
189-
System.out.println(reverseColumns ? size + "\t" + count + "\t" + uri : uri + "\t" + count + "\t" + size);
248+
if (summarize) {
249+
for (String uri : counts.keySet()) {
250+
Integer count = counts.get(uri);
251+
String byteSize = String.valueOf(sizes.get(uri));
252+
String humanReadableSize = FORMATTER.format(sizes.get(uri));
253+
254+
// format per --bytes, --human-readable, --reverse-columns
255+
if (bytes && humanReadable) {
256+
writer.println(reverseColumns ? joiner.join(byteSize, humanReadableSize, count, uri) : joiner.join(uri, count, byteSize, humanReadableSize));
257+
}
258+
else if (humanReadable) {
259+
writer.println(reverseColumns ? joiner.join(humanReadableSize, count, uri) : joiner.join(uri, count, humanReadableSize));
260+
}
261+
else {
262+
writer.println(reverseColumns ? joiner.join(byteSize, count, uri) : joiner.join(uri, count, byteSize));
263+
}
264+
}
190265
}
191266
}
192267

0 commit comments

Comments
 (0)