@@ -64,37 +64,56 @@ impl Value {
6464}
6565
6666static USAGE : & str = r#"
67- Search for rows where the value in <column> matches <value> using binary search,
68- and flush all records after the target value.
69- The default behavior is similar to a lower_bound bisection, but you can exclude
70- records (equivalent to upper_bound) with the target value using the -E/--exclude
71- flag. It is assumed that the INPUT IS SORTED according to the specified column.
72- The ordering of the rows is assumed to be sorted according ascending lexicographic
73- order per default, but you can specify numeric ordering using the -N or --numeric
74- flag. You can also reverse the order using the -R/--reverse flag.
75- Use the -S/--search flag to only flush records matching the target value instead
76- of all records after it.
67+ Perform binary search on sorted CSV data.
68+
69+ This command is one order of magnitude faster than relying on `xan filter` or
70+ `xan search` but only works if target file is sorted on searched column, exists
71+ on disk and is not compressed (unless the compressed file remains seekable,
72+ typically if some `.gzi` index can be found beside it).
73+
74+ If CSV data is not properly sorted, result will be incorrect!
75+
76+ By default this command executes the so-called "lower bound" operation: it
77+ positions itself in the file where one would insert the searched value and then
78+ proceeds to flush the file from this point. This can be useful when piping
79+ into other commands to perform range queries, for instance, or enumerate values
80+ starting with some prefix.
81+
82+ Use the -S/--search flag if you only want to return rows matching your query
83+ exactly.
84+
85+ Finally, use the -R/--reverse flag if data is sorted in descending order and
86+ the -N/--numeric flag if data is sorted numerically rather than lexicographically.
87+
88+ Examples:
89+
90+ Searching for rows matching exactly "Anna" in a "name" column:
91+
92+ $ xan bisect -S name Anna people.csv
93+
94+ Finding all names starting with letter A:
95+
96+ $ xan bisect name A people.csv | xan slice -E '!name.startswith("A")'
7797
7898Usage:
7999 xan bisect [options] [--] <column> <value> <input>
80100 xan bisect --help
81101
82102bisect options:
83- -E, --exclude When set, the records with the target value will be
84- excluded from the output. By default, they are
85- included. Cannot be used with -S/--search.
86- TODO: not equivalent to upper_bound
87- -N, --numeric Compare according to the numerical value of cells
88- instead of the default lexicographic order.
89- -R, --reverse Reverse sort order, i.e. descending order.
90- -S, --search Perform a search on the target value instead of
91- flushing all records after the value (included).
92- Cannot be used with -E/--exclude nor -e/--end.
93- -e, --end <end-value> When set, the records after the target value will be
94- flushed until <end-value> is reached (included).
95- By default, all records after the target value are
96- flushed. Cannot be used with -S/--search.
97- -v, --verbose
103+ -S, --search Perform an exact search and only emit rows matching the
104+ query, instead of flushing all rows from found position.
105+ -R, --reverse Indicate that the file is sorted on <column> in descending
106+ order, instead of the default ascending order.
107+ -N, --numeric Indicate that searched values are numbers and that the order
108+ of the file is numerical instead of default lexicographic
109+ order.
110+ -E, --exclude When set, rows matching query exactly will be filtered out.
111+ It is equivalent to performing the "upper bound" operation
112+ but it does not come with the same performance guarantees
113+ in case there are many rows containing the searched values.
114+ Does not work with -S/--search.
115+ -v, --verbose Print some log detailing the search process in stderr, mostly
116+ for debugging purposes.
98117
99118Common options:
100119 -h, --help Display this message
@@ -114,7 +133,6 @@ struct Args {
114133 flag_numeric : bool ,
115134 flag_reverse : bool ,
116135 flag_search : bool ,
117- flag_end_value : Option < String > ,
118136 flag_output : Option < String > ,
119137 flag_no_headers : bool ,
120138 flag_delimiter : Option < Delimiter > ,
@@ -150,10 +168,6 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
150168 Err ( "The -E/--exclude and -S/--search flags cannot be used together" ) ?;
151169 }
152170
153- if args. flag_search && args. flag_end_value . is_some ( ) {
154- Err ( "The -S/--search and -e/--end flags cannot be used together" ) ?;
155- }
156-
157171 macro_rules! log {
158172 ( $( $arg: tt) * ) => {
159173 if args. flag_verbose {
0 commit comments