Skip to content

Commit dd9b9d3

Browse files
committed
Add a new 'info' mode to read the header of compressed files
1 parent 68fce24 commit dd9b9d3

File tree

4 files changed

+146
-60
lines changed

4 files changed

+146
-60
lines changed

src/app/BlockDecompressor.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,24 @@ int BlockDecompressor::decompress(uint64& inputSize)
8787
transform(upperInputName.begin(), upperInputName.end(), upperInputName.begin(), ::toupper);
8888
bool isStdIn = upperInputName == "STDIN";
8989

90+
// In mode "info", we want to display the information in the stream header only.
91+
// We can reuse the existing code but we need to:
92+
// create an InfoPrinter with a dedicated INFO type
93+
// disable logging outside of this printer (=> _verbosity=0)
94+
// decompress no block (=> _outputName = NONE and --from=1 and --to=1)
95+
// disable threading for proper display (=> _jobs=1)
96+
bool isInfo = _ctx.getString("mode") == "y";
97+
int vl = _verbosity;
98+
99+
if (isInfo) {
100+
_verbosity = 0;
101+
_outputName = "NONE";
102+
_ctx.putString("outputName", _outputName);
103+
_ctx.putInt("from", 1);
104+
_ctx.putInt("to", 1);
105+
_ctx.putInt("jobs", 1);
106+
}
107+
90108
if (isStdIn == false) {
91109
vector<string> errors;
92110
bool isRecursive = (_inputName.length() < 2) ||
@@ -137,9 +155,10 @@ int BlockDecompressor::decompress(uint64& inputSize)
137155
ss.str(string());
138156
}
139157

140-
InfoPrinter listener(_verbosity, InfoPrinter::DECODING, cout);
158+
InfoPrinter::Type ipt = isInfo ? InfoPrinter::INFO : InfoPrinter::DECODING;
159+
InfoPrinter listener(vl, ipt, cout);
141160

142-
if (_verbosity > 2)
161+
if ((vl > 2) || ((isInfo == true) && (vl > 0)))
143162
addListener(listener);
144163

145164
int res = 0;

src/app/InfoPrinter.cpp

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ limitations under the License.
1818
#include <ios>
1919
#include <sstream>
2020
#include "InfoPrinter.hpp"
21-
#include "../util.hpp"
21+
#include "../util/strings.hpp"
2222

2323
using namespace kanzi;
2424
using namespace std;
@@ -145,29 +145,55 @@ void InfoPrinter::processEvent(const Event& evt)
145145
_map[hash(currentBlockId)] = nullptr;
146146
}
147147
else if ((evt.getType() == Event::AFTER_HEADER_DECODING) && (_level >= 3)) {
148-
stringstream ss(evt.toString());
149-
string s = ss.str();
148+
// Special CSV format
149+
stringstream ss;
150150
vector<string> tokens;
151+
string s = evt.toString();
151152
const int nbTokens = tokenizeCSV(s, tokens);
152-
ss.str(string());
153153

154-
if (nbTokens > 1)
155-
ss << "Bitstream version: " << tokens[1] << endl;
154+
if (_level >= 5) {
155+
// JSON text
156+
if (nbTokens > 1)
157+
ss << ", \"bsVersion\":" << tokens[1];
158+
159+
if (nbTokens > 2)
160+
ss << ", \"checksize\":" << tokens[2];
161+
162+
if (nbTokens > 3)
163+
ss << ", \"blocksize\":" << tokens[3];
164+
165+
if (nbTokens > 4)
166+
ss << ", \"entropy\":" << (tokens[4] == "" ? "none" : "\"" + tokens[4] + "\"");
156167

157-
if (nbTokens > 2)
158-
ss << "Block checksum: " << tokens[2] << (tokens[2] == "NONE" ? "" : " bits") << endl;
168+
if (nbTokens > 5)
169+
ss << ", \"transforms\":" << (tokens[5] == "" ? "none" : "\"" + tokens[5] + "\"");
159170

160-
if (nbTokens > 3)
161-
ss << "Block size: " << tokens[3] << " bytes" << endl;
171+
if (nbTokens > 6)
172+
ss << ", \"compressed\":" << (tokens[6] == "" ? "N/A" : tokens[6]);
162173

163-
if (nbTokens > 4)
164-
ss << "Using " << (tokens[4] == "" ? "no" : tokens[4]) << " entropy codec (stage 1)" << endl;
174+
if (nbTokens > 7)
175+
ss << ", \"original\":" << (tokens[7] == "" ? "N/A" : tokens[7]);
176+
}
177+
else {
178+
// Raw text
179+
if (nbTokens > 1)
180+
ss << "Bitstream version: " << tokens[1] << endl;
181+
182+
if (nbTokens > 2)
183+
ss << "Block checksum: " << (tokens[2] == "0" ? "NONE" : tokens[2] + " bits") << endl;
184+
185+
if (nbTokens > 3)
186+
ss << "Block size: " << tokens[3] << " bytes" << endl;
165187

166-
if (nbTokens > 5)
167-
ss << "Using " << (tokens[5] == "" ? "no" : tokens[5]) << " transform (stage 2)" << endl;
188+
if (nbTokens > 4)
189+
ss << "Using " << (tokens[4] == "" ? "no" : tokens[4]) << " entropy codec (stage 1)" << endl;
168190

169-
if (nbTokens > 7)
170-
ss << "Original size: " << tokens[7] << " byte(s)" << endl;;
191+
if (nbTokens > 5)
192+
ss << "Using " << (tokens[5] == "" ? "no" : tokens[5]) << " transform (stage 2)" << endl;
193+
194+
if ((nbTokens > 7) && (tokens[7] != ""))
195+
ss << "Original size: " << tokens[7] << " byte(s)" << endl;
196+
}
171197

172198
_os << ss.str() << endl;
173199
}
@@ -190,6 +216,7 @@ void InfoPrinter::processHeaderInfo(const Event& evt)
190216

191217
if (_headerInfo++ == 0) {
192218
// Display header
219+
ss << endl;
193220
ss << "|" << " File Name ";
194221
ss << "|" << "Ver";
195222
ss << "|" << "Check";
@@ -200,7 +227,7 @@ void InfoPrinter::processHeaderInfo(const Event& evt)
200227

201228
if (_level >= 4) {
202229
ss << "|" << " Entropy";
203-
ss << "|" << " Transforms ";
230+
ss << "|" << " Transforms ";
204231
}
205232

206233
ss << "|" << endl;
@@ -264,10 +291,10 @@ void InfoPrinter::processHeaderInfo(const Event& evt)
264291
if ((_level >= 4) && (nbTokens > 5)) {
265292
string t = tokens[5];
266293

267-
if (t.length() > 22)
268-
t = t.substr(0, 20) + "..";
294+
if (t.length() > 26)
295+
t = t.substr(0, 24) + "..";
269296

270-
ss << setw(22) << (t == "" ? "NONE" : t) << "|"; // transforms
297+
ss << setw(26) << (t == "" ? "NONE" : t) << "|"; // transforms
271298
}
272299

273300
_os << ss.str() << endl;

src/app/Kanzi.cpp

Lines changed: 77 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ limitations under the License.
3232
using namespace kanzi;
3333
using namespace std;
3434

35-
static const string CMD_LINE_ARGS[14] = {
36-
"-c", "-d", "-i", "-o", "-b", "-t", "-e", "-j", "-v", "-l", "-s", "-x", "-f", "-h"
35+
static const string CMD_LINE_ARGS[15] = {
36+
"-c", "-d", "-i", "-o", "-b", "-t", "-e", "-j", "-v", "-l", "-s", "-x", "-f", "-h", "-y"
3737
};
3838

3939
//static const int ARG_IDX_COMPRESS = 0;
@@ -53,7 +53,7 @@ static const int ARG_IDX_LEVEL = 9;
5353
static const string KANZI_VERSION = "2.4.0";
5454
static const string APP_HEADER = "Kanzi " + KANZI_VERSION + " (c) Frederic Langlet";
5555
static const string APP_SUB_HEADER = "Fast lossless data compressor.";
56-
static const string APP_USAGE = "Usage: kanzi [-c|-d] [flags and files in any order]";
56+
static const string APP_USAGE = "Usage: kanzi [-c|-d|-y] [flags and files in any order]";
5757

5858

5959
#ifdef CONCURRENCY_ENABLED
@@ -77,14 +77,16 @@ void printHelp(Printer& log, const string& mode, bool showHeader)
7777
log.println("Options\n", true);
7878
log.println(" -h, --help", true);
7979

80-
if ((mode.compare(0, 1, "c") != 0) && (mode.compare(0, 1, "d") != 0)) {
80+
if ((mode != "c") && (mode != "d") && (mode != "y")) {
8181
log.println(" Display this message.", true);
8282
log.println(" Use in conjunction with -c to print information for compression,", true);
8383
log.println(" or -d to print information for decompression.\n", true);
8484
log.println(" -c, --compress", true);
8585
log.println(" Compress mode\n", true);
8686
log.println(" -d, --decompress", true);
8787
log.println(" Decompress mode\n", true);
88+
log.println(" -y, --info", true);
89+
log.println(" Info mode: display information about compressed files\n", true);
8890
}
8991
else {
9092
log.println(" Display this message.\n", true);
@@ -101,23 +103,26 @@ void printHelp(Printer& log, const string& mode, bool showHeader)
101103
log.println(ss.str(), true);
102104
log.println(" If this option is not provided, kanzi reads data from stdin.\n", true);
103105
ss.str(string());
104-
log.println(" -o, --output=<outputName>", true);
105106

106-
if (mode.compare(0, 1, "c") == 0) {
107-
log.println(" Optional name of the output file or directory (defaults to", true);
108-
log.println(" <inputName.knz> if input is <inputName> or 'stdout' if input is 'stdin').", true);
109-
log.println(" or 'none' or 'stdout'.\n", true);
110-
}
111-
else if (mode.compare(0, 1, "d") == 0) {
112-
log.println(" Optional name of the output file or directory (defaults to", true);
113-
log.println(" <inputName> if input is <inputName.knz> or 'stdout' if input is 'stdin').", true);
114-
log.println(" or 'none' or 'stdout'.\n", true);
115-
}
116-
else {
117-
log.println(" Optional name of the output file or 'none' or 'stdout'.\n", true);
107+
if (mode != "y") {
108+
log.println(" -o, --output=<outputName>", true);
109+
110+
if (mode == "c") {
111+
log.println(" Optional name of the output file or directory (defaults to", true);
112+
log.println(" <inputName.knz> if input is <inputName> or 'stdout' if input is 'stdin').", true);
113+
log.println(" or 'none' or 'stdout'.\n", true);
114+
}
115+
else if (mode == "d") {
116+
log.println(" Optional name of the output file or directory (defaults to", true);
117+
log.println(" <inputName> if input is <inputName.knz> or 'stdout' if input is 'stdin').", true);
118+
log.println(" or 'none' or 'stdout'.\n", true);
119+
}
120+
else {
121+
log.println(" Optional name of the output file or 'none' or 'stdout'.\n", true);
122+
}
118123
}
119124

120-
if (mode.compare(0, 1, "c") == 0) {
125+
if (mode == "c") {
121126
log.println(" -b, --block=<size>", true);
122127
log.println(" Size of blocks (default 4|8|16|32 MiB based on level, max 1 GiB, min 1 KiB).", true);
123128
log.println(" 'auto' means that the compressor derives the best value", true);
@@ -162,24 +167,29 @@ void printHelp(Printer& log, const string& mode, bool showHeader)
162167
log.println(" 4=display block size and timings, 5=display extra information", true);
163168
log.println(" Verbosity is reduced to 1 when files are processed concurrently", true);
164169
log.println(" Verbosity is reduced to 0 when the output is 'stdout'\n", true);
165-
log.println(" -f, --force", true);
166-
log.println(" Overwrite the output file if it already exists\n", true);
167-
log.println(" --rm", true);
168170

169-
if (mode.compare(0, 1, "c") == 0) {
171+
if (mode != "y") {
172+
log.println(" -f, --force", true);
173+
log.println(" Overwrite the output file if it already exists\n", true);
174+
}
175+
176+
if (mode == "c") {
177+
log.println(" --rm", true);
170178
log.println(" Remove the input file after successful compression.", true);
179+
log.println(" If the input is a folder, all processed files under the folder are removed.\n", true);
171180
}
172-
else {
181+
else if (mode == "d") {
182+
log.println(" --rm", true);
173183
log.println(" Remove the input file after successful decompression.", true);
184+
log.println(" If the input is a folder, all processed files under the folder are removed.\n", true);
174185
}
175186

176-
log.println(" If the input is a folder, all processed files under the folder are removed.\n", true);
177187
log.println(" --no-link", true);
178188
log.println(" Skip links\n", true);
179189
log.println(" --no-dot-file", true);
180190
log.println(" Skip dot files\n", true);
181191

182-
if (mode.compare(0, 1, "d") == 0) {
192+
if (mode == "d") {
183193
log.println(" --from=blockId", true);
184194
log.println(" Decompress starting at the provided block (included).", true);
185195
log.println(" The first block ID is 1.\n", true);
@@ -191,7 +201,7 @@ void printHelp(Printer& log, const string& mode, bool showHeader)
191201
log.println(" kanzi --decompress --input=foo.knz --force --verbose=2 --jobs=2\n", true);
192202
}
193203

194-
if (mode.compare(0, 1, "c") == 0) {
204+
if (mode == "c") {
195205
log.println("", true);
196206
log.println("Transforms\n", true);
197207
log.println(" BWT: Burrows Wheeler Transform is a transform that reorders symbols", true);
@@ -399,8 +409,8 @@ int processCommandLine(int argc, const char* argv[], Context& map, Printer& log)
399409

400410
// Extract verbosity, output and mode first
401411
if ((arg == "-c") || (arg.compare(0, 10, "--compress") == 0)) {
402-
if (mode == "d") {
403-
cerr << "Both compression and decompression options were provided." << endl;
412+
if (mode != "") {
413+
cerr << "Only one mode can be provided (already got '" << mode << "'" << endl;
404414
return Error::ERR_INVALID_PARAM;
405415
}
406416

@@ -409,15 +419,25 @@ int processCommandLine(int argc, const char* argv[], Context& map, Printer& log)
409419
}
410420

411421
if ((arg == "-d") || (arg.compare(0, 12, "--decompress") == 0)) {
412-
if (mode == "c") {
413-
cerr << "Both compression and decompression options were provided." << endl;
422+
if (mode != "") {
423+
cerr << "Only one mode can be provided (already got '" << mode << "'" << endl;
414424
return Error::ERR_INVALID_PARAM;
415425
}
416426

417427
mode = "d";
418428
continue;
419429
}
420430

431+
if ((arg == "-y") || (arg.compare(0, 10, "--info") == 0)) {
432+
if (mode != "") {
433+
cerr << "Only one mode can be provided (already got '" << mode << "')" << endl;
434+
return Error::ERR_INVALID_PARAM;
435+
}
436+
437+
mode = "y";
438+
continue;
439+
}
440+
421441
if ((ctx == ARG_IDX_VERBOSE) || (arg.compare(0, 10, "--verbose=") == 0)) {
422442
if (verboseFlag == true) {
423443
WARNING_OPT_DUPLICATE("verbosity level", arg);
@@ -467,12 +487,17 @@ int processCommandLine(int argc, const char* argv[], Context& map, Printer& log)
467487
}
468488
}
469489
else {
470-
string str = outputName;
471-
transform(str.begin(), str.end(), str.begin(), ::toupper);
490+
if (mode == "y") {
491+
log.println("Warning: ignoring option [" + outputName + "]. Only applicable in compression or decompression mode.", verbose > 0);
492+
}
493+
else {
494+
string str = outputName;
495+
transform(str.begin(), str.end(), str.begin(), ::toupper);
472496

473-
if (str == "STDOUT") {
474-
verbose = 0;
475-
verboseFlag = true;
497+
if (str == "STDOUT") {
498+
verbose = 0;
499+
verboseFlag = true;
500+
}
476501
}
477502
}
478503

@@ -494,7 +519,8 @@ int processCommandLine(int argc, const char* argv[], Context& map, Printer& log)
494519
arg = arg.substr(k);
495520
}
496521

497-
if ((arg == "-c") || (arg == "-d") || (arg == "--compress") || (arg == "--decompress")) {
522+
if ((arg == "-c") || (arg == "-d") || (arg == "-y") || (arg == "--compress") || (arg == "--decompress") ||
523+
(arg == "--info")) {
498524
if (ctx != -1) {
499525
WARNING_OPT_NOVALUE(CMD_LINE_ARGS[ctx]);
500526
}
@@ -511,6 +537,11 @@ int processCommandLine(int argc, const char* argv[], Context& map, Printer& log)
511537
WARNING_OPT_DUPLICATE(arg, "true");
512538
}
513539

540+
if (mode == "y") {
541+
WARNING_OPT_COMP_ONLY(arg);
542+
continue;
543+
}
544+
514545
overwrite = 1;
515546
ctx = -1;
516547
continue;
@@ -559,6 +590,10 @@ int processCommandLine(int argc, const char* argv[], Context& map, Printer& log)
559590
WARNING_OPT_DUPLICATE(arg, "true");
560591
}
561592

593+
if (mode == "y") {
594+
WARNING_OPT_COMP_ONLY(arg);
595+
continue;
596+
}
562597
remove = 1;
563598
ctx = -1;
564599
continue;
@@ -630,6 +665,11 @@ int processCommandLine(int argc, const char* argv[], Context& map, Printer& log)
630665
WARNING_OPT_DUPLICATE(msg, arg);
631666
}
632667
else {
668+
if (mode == "y") {
669+
WARNING_OPT_COMP_ONLY(arg);
670+
continue;
671+
}
672+
633673
if ((arg.length() >= 2) && (arg[0] == '.') && (arg[1] == PATH_SEPARATOR)) {
634674
arg = (arg.length() == 2) ? arg.substr(0, 1) : arg.substr(2);
635675
}
@@ -1072,7 +1112,7 @@ int main(int argc, const char* argv[])
10721112
}
10731113
}
10741114

1075-
if (mode == "d") {
1115+
if ((mode == "d") || (mode == "y")) {
10761116
try {
10771117
BlockDecompressor bd(ctx);
10781118
uint64 read = 0;

src/io/CompressedInputStream.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ void CompressedInputStream::readHeader()
379379
string inputName = _ctx.getString("inputName", "");
380380
ss << inputName << ",";
381381
ss << bsVersion << ",";
382-
string ckSize = "NONE";
382+
string ckSize = "0";
383383

384384
if (_hasher32 != nullptr)
385385
ckSize = "32";

0 commit comments

Comments
 (0)