Skip to content

Commit 9af394e

Browse files
scan: fix filter erroneous early exit
The issue described below is related to the read operations which allows to scan: `crud.select`, `crud.pairs`, `crud.count`, `readview:select` and `readview:pairs`. The erroneous behavior reported by [1] and #418 is as follows: - result changes when reordering operation conditions; - when `>=` condition operation is changed to `=`, there are more rows in the result. The reason is as follows. Scanning read operates with two entities: an iterator and a filter. The iterator includes an index, a starting value and iterator type (EQ, GT, etc.). The iterator is built from conditions, if possible, otherwise primary index is used. Remaining conditions form the filter, so the actual result satisfies all operation conditions. The filter supports early exit. Let's consider the following example. For `crud.select(space, {{'>=', 'id', 1}, {'<=', 'id', 10}})`, where `id` is an index (or an indexed field), the iterator uses index `id`, starts from key = `1` and goes by GE rules, covering [1, +inf) ordered keys. On the other hand, when iterator reaches the tuple with `id` = `11`, all tuples after this one will never satisfy the second condition, because our iterator yields tuples sorted by `id` (due to underlying index). So filter tells than there is no reason to scan anymore, and we finish the scanning procedure. Before this patch, the function behind early exit decision had worked as follows: "if the condition is an index, we go in forward (reverse) order and `<=` or `<` (`>=` or `>`) condition is violated, there is no reason to scan anymore". But the valid approach is "if the condition is SCANNING index...". Before this patch, filter had assumed that if the condition for index is specified, tuples are ordered, but it works only if iterator uses the same index as in the condition. This patch fixes the issue. The erroneous behavior may happen in the following case: - there are multiple conditions, - there are at least two different index operands, - non-scanning index condition uses `<=`, `<`, `>=` or `>` operation. 1. https://jira.vk.team/browse/TNT-941 Closes #418
1 parent 495ddee commit 9af394e

File tree

15 files changed

+304
-26
lines changed

15 files changed

+304
-26
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1010
### Fixed
1111
* Compatibility with vshard configuration if UUIDs are omitted (#407).
1212
* Compatibility with automatic master discovery in vshard (#409).
13+
* Secondary conditions for index operands with operations `>=`, `<=`, `>`, `<`
14+
no longer cause missing part of the actual result for scan operations
15+
(`crud.select`, `crud.pairs`, `crud.count`, `readview:select`,
16+
`readview:pairs`) (#418).
1317

1418
## [1.4.2] - 25-12-23
1519

crud/compare/filters.lua

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,8 @@ local filters = {}
1717
- opposite condition `'id' < 100` becomes false
1818
- in such case we can exit from iteration
1919
]]
20-
local function is_early_exit_possible(index, tarantool_iter, condition)
21-
if index == nil then
22-
return false
23-
end
24-
25-
if index.name ~= condition.operand then
20+
local function is_early_exit_possible(scan_index, tarantool_iter, condition)
21+
if scan_index.name ~= condition.operand then
2622
return false
2723
end
2824

@@ -77,8 +73,8 @@ local function get_values_opts(index)
7773
return index_field_opts
7874
end
7975

80-
local function parse(space, conditions, opts)
81-
dev_checks('table', '?table', {
76+
local function parse(space, scan_index, conditions, opts)
77+
dev_checks('table', 'table', '?table', {
8278
scan_condition_num = '?number',
8379
tarantool_iter = 'number',
8480
})
@@ -137,7 +133,11 @@ local function parse(space, conditions, opts)
137133
operator = condition.operator,
138134
values = condition.values,
139135
types = fields_types,
140-
early_exit_is_possible = is_early_exit_possible(index, opts.tarantool_iter, condition),
136+
early_exit_is_possible = is_early_exit_possible(
137+
scan_index,
138+
opts.tarantool_iter,
139+
condition
140+
),
141141
values_opts = values_opts,
142142
})
143143
end
@@ -645,13 +645,13 @@ local function compile(filter_code)
645645
return func
646646
end
647647

648-
function filters.gen_func(space, conditions, opts)
649-
dev_checks('table', '?table', {
648+
function filters.gen_func(space, scan_index, conditions, opts)
649+
dev_checks('table', 'table', '?table', {
650650
tarantool_iter = 'number',
651651
scan_condition_num = '?number',
652652
})
653653

654-
local filter_conditions, err = parse(space, conditions, {
654+
local filter_conditions, err = parse(space, scan_index, conditions, {
655655
scan_condition_num = opts.scan_condition_num,
656656
tarantool_iter = opts.tarantool_iter,
657657
})

crud/count.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ local function count_on_storage(space_name, index_id, conditions, opts)
5252

5353
local value = opts.scan_value
5454

55-
local filter_func, err = filters.gen_func(space, conditions, {
55+
local filter_func, err = filters.gen_func(space, index, conditions, {
5656
tarantool_iter = opts.tarantool_iter,
5757
scan_condition_num = opts.scan_condition_num,
5858
})

crud/readview.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ local function select_readview_on_storage(space_name, index_id, conditions, opts
160160
return nil, err
161161
end
162162

163-
local filter_func, err = select_filters.gen_func(space, conditions, {
163+
local filter_func, err = select_filters.gen_func(space, index, conditions, {
164164
tarantool_iter = opts.tarantool_iter,
165165
scan_condition_num = opts.scan_condition_num,
166166
})

crud/select.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ local function select_on_storage(space_name, index_id, conditions, opts)
7878
return nil, err
7979
end
8080

81-
local filter_func, err = select_filters.gen_func(space, conditions, {
81+
local filter_func, err = select_filters.gen_func(space, index, conditions, {
8282
tarantool_iter = opts.tarantool_iter,
8383
scan_condition_num = opts.scan_condition_num,
8484
})

test/entrypoint/srv_select/storage_init.lua

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,4 +192,39 @@ return function()
192192
if_not_exists = true,
193193
})
194194
end
195+
196+
local logins_space = box.schema.space.create('logins', {
197+
format = {
198+
{name = 'id', type = 'unsigned'},
199+
{name = 'bucket_id', type = 'unsigned'},
200+
{name = 'city', type = 'string'},
201+
{name = 'name', type = 'string'},
202+
{name = 'last_login', type = 'number'},
203+
},
204+
if_not_exists = true,
205+
engine = engine,
206+
})
207+
208+
logins_space:create_index('id_index', {
209+
parts = { 'id' },
210+
if_not_exists = true,
211+
})
212+
213+
logins_space:create_index('bucket_id', {
214+
parts = { 'bucket_id' },
215+
unique = false,
216+
if_not_exists = true,
217+
})
218+
219+
logins_space:create_index('city', {
220+
parts = { 'city' },
221+
unique = false,
222+
if_not_exists = true,
223+
})
224+
225+
logins_space:create_index('last_login', {
226+
parts = { 'last_login' },
227+
unique = false,
228+
if_not_exists = true,
229+
})
195230
end

test/helper.lua

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,4 +944,18 @@ function helpers.assert_str_contains_pattern_with_replicaset_id(str, pattern)
944944
t.assert(found, ("string %q does not contain pattern %q"):format(str, pattern))
945945
end
946946

947+
function helpers.prepare_ordered_data(g, space, expected_objects, bucket_id, order_condition)
948+
helpers.insert_objects(g, space, expected_objects)
949+
950+
local resp, err = g.cluster.main_server:call('crud.select', {
951+
space,
952+
{order_condition},
953+
{bucket_id = bucket_id, mode = 'write'},
954+
})
955+
t.assert_equals(err, nil)
956+
957+
local objects = crud.unflatten_rows(resp.rows, resp.metadata)
958+
t.assert_equals(objects, expected_objects)
959+
end
960+
947961
return helpers

test/integration/pairs_readview_test.lua

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ local t = require('luatest')
33
local crud_utils = require('crud.common.utils')
44

55
local helpers = require('test.helper')
6+
local read_scenario = require('test.integration.read_scenario')
67

78
local pgroup = t.group('pairs_readview', helpers.backend_matrix({
89
{engine = 'memtx'},
@@ -880,3 +881,28 @@ pgroup.test_pairs_no_map_reduce = function(g)
880881
local diff_2 = map_reduces_after_2 - map_reduces_after_1
881882
t.assert_equals(diff_2, 0, 'Select request was not a map reduce')
882883
end
884+
885+
pgroup.test_gh_418_pairs_with_secondary_noneq_index_condition = function(g)
886+
local read = function(cg, space, conditions, opts)
887+
opts = table.deepcopy(opts) or {}
888+
opts.use_tomap = true
889+
890+
return cg.cluster.main_server:exec(function(space, conditions, opts)
891+
local crud = require('crud')
892+
893+
local rv, err = crud.readview()
894+
t.assert_equals(err, nil)
895+
896+
local status, resp = pcall(function()
897+
return rv:pairs(space, conditions, opts):totable()
898+
end)
899+
t.assert(status, resp)
900+
901+
rv:close()
902+
903+
return resp, nil
904+
end, {space, conditions, opts})
905+
end
906+
907+
read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read)
908+
end

test/integration/pairs_test.lua

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ local t = require('luatest')
33
local crud_utils = require('crud.common.utils')
44

55
local helpers = require('test.helper')
6+
local read_scenario = require('test.integration.read_scenario')
67

78
local pgroup = t.group('pairs', helpers.backend_matrix({
89
{engine = 'memtx'},
@@ -891,3 +892,24 @@ pgroup.test_pairs_no_map_reduce = function(g)
891892
local diff_2 = map_reduces_after_2 - map_reduces_after_1
892893
t.assert_equals(diff_2, 0, 'Select request was not a map reduce')
893894
end
895+
896+
pgroup.test_gh_418_pairs_with_secondary_noneq_index_condition = function(g)
897+
local read = function(cg, space, conditions, opts)
898+
opts = table.deepcopy(opts) or {}
899+
opts.mode = 'write'
900+
opts.use_tomap = true
901+
902+
return cg.cluster.main_server:exec(function(space, conditions, opts)
903+
local crud = require('crud')
904+
905+
local status, resp = pcall(function()
906+
return crud.pairs(space, conditions, opts):totable()
907+
end)
908+
t.assert(status, resp)
909+
910+
return resp, nil
911+
end, {space, conditions, opts})
912+
end
913+
914+
read_scenario.gh_418_read_with_secondary_noneq_index_condition(g, read)
915+
end

test/integration/read_scenario.lua

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
-- crud.select/crud.pairs/readview:select/readview:pairs
2+
-- have a lot of common scenarios, which are mostly tested with
3+
-- four nearly identical copypasted test functions now.
4+
-- This approach is expected to improve it at least for new test cases.
5+
-- Scenarios here are for `srv_select` entrypoint.
6+
7+
local t = require('luatest')
8+
9+
local helpers = require('test.helper')
10+
11+
local function gh_418_read_with_secondary_noneq_index_condition(cg, read)
12+
-- Pin bucket_id to reproduce issue on a single storage.
13+
local PINNED_BUCKET_NO = 1
14+
15+
local expected_objects = {
16+
{
17+
id = 1,
18+
bucket_id = PINNED_BUCKET_NO,
19+
city = 'Tatsumi Port Island',
20+
name = 'Yukari',
21+
last_login = 42,
22+
},
23+
{
24+
id = 2,
25+
bucket_id = PINNED_BUCKET_NO,
26+
city = 'Tatsumi Port Island',
27+
name = 'Junpei',
28+
last_login = 52,
29+
},
30+
{
31+
id = 3,
32+
bucket_id = PINNED_BUCKET_NO,
33+
city = 'Tatsumi Port Island',
34+
name = 'Mitsuru',
35+
last_login = 42,
36+
},
37+
}
38+
39+
helpers.prepare_ordered_data(cg,
40+
'logins',
41+
expected_objects,
42+
PINNED_BUCKET_NO,
43+
{'=', 'city', 'Tatsumi Port Island'}
44+
)
45+
46+
-- Issue https://github.com/tarantool/crud/issues/418 is as follows:
47+
-- storage iterator exits early on the second tuple because
48+
-- iterator had erroneously expected tuples to be sorted by `last_login`
49+
-- index while iterating on `city` index. Before the issue had beed fixed,
50+
-- user had received only one record instead of two.
51+
local objects = read(cg,
52+
'logins',
53+
{{'=', 'city', 'Tatsumi Port Island'}, {'<=', 'last_login', 42}},
54+
{bucket_id = PINNED_BUCKET_NO}
55+
)
56+
57+
t.assert_equals(objects, {expected_objects[1], expected_objects[3]})
58+
end
59+
60+
return {
61+
gh_418_read_with_secondary_noneq_index_condition = gh_418_read_with_secondary_noneq_index_condition,
62+
}

0 commit comments

Comments
 (0)