Skip to content

Commit 93153a7

Browse files
zzzxl1993Your Name
authored andcommitted
[fix](profile) ensure file cache profile stats output in cloud mode (#57464)
introduce by #52118 1. Remove the `query_options().enable_file_cache` check in `olap_scanner.cpp` when reporting file cache profile statistics. This check should only apply to external table scanners (FileScanner).
1 parent d8663df commit 93153a7

File tree

2 files changed

+143
-3
lines changed

2 files changed

+143
-3
lines changed

be/src/vec/exec/scan/olap_scanner.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -769,9 +769,8 @@ void OlapScanner::_collect_profile_before_close() {
769769
inverted_index_profile.update(local_state->_index_filter_profile.get(),
770770
&stats.inverted_index_stats);
771771

772-
// only cloud deploy mode will use file cache. and keep the same with FileScanner
773-
if (config::is_cloud_mode() && config::enable_file_cache &&
774-
_state->query_options().enable_file_cache) {
772+
// only cloud deploy mode will use file cache.
773+
if (config::is_cloud_mode() && config::enable_file_cache) {
775774
io::FileCacheProfileReporter cache_profile(local_state->_segment_profile.get());
776775
cache_profile.update(&stats.file_cache_stats);
777776
}
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
import java.util.regex.Pattern
19+
20+
suite('test_inverted_index_io_timer', 'p0') {
21+
if (!isCloudMode()) {
22+
return;
23+
}
24+
25+
def indexTbName1 = "test_inverted_index_io_timer_tbl"
26+
27+
sql "DROP TABLE IF EXISTS ${indexTbName1}"
28+
29+
// Create table with inverted index using httplogs schema
30+
sql """
31+
CREATE TABLE ${indexTbName1} (
32+
`@timestamp` int(11) NULL COMMENT "",
33+
`clientip` varchar(20) NULL COMMENT "",
34+
`request` text NULL COMMENT "",
35+
`status` int(11) NULL COMMENT "",
36+
`size` int(11) NULL COMMENT "",
37+
INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '',
38+
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
39+
INDEX status_idx (`status`) USING INVERTED COMMENT '',
40+
INDEX size_idx (`size`) USING INVERTED COMMENT ''
41+
) ENGINE=OLAP
42+
DUPLICATE KEY(`@timestamp`)
43+
COMMENT "OLAP"
44+
DISTRIBUTED BY RANDOM BUCKETS 1
45+
PROPERTIES (
46+
"replication_allocation" = "tag.location.default: 1"
47+
);
48+
"""
49+
50+
// Define data loading function
51+
def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
52+
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
53+
54+
// load the json data
55+
streamLoad {
56+
table "${table_name}"
57+
58+
// set http request header params
59+
set 'label', label + "_" + UUID.randomUUID().toString()
60+
set 'read_json_by_line', read_flag
61+
set 'format', format_flag
62+
file file_name // import json file
63+
time 10000 // limit inflight 10s
64+
if (expected_succ_rows >= 0) {
65+
set 'max_filter_ratio', '1'
66+
}
67+
68+
// if declared a check callback, the default check condition will ignore.
69+
// So you must check all condition
70+
check { result, exception, startTime, endTime ->
71+
if (ignore_failure && expected_succ_rows < 0) { return }
72+
if (exception != null) {
73+
throw exception
74+
}
75+
log.info("Stream load result: ${result}".toString())
76+
def json = parseJson(result)
77+
assertEquals("success", json.Status.toLowerCase())
78+
if (expected_succ_rows >= 0) {
79+
assertEquals(json.NumberLoadedRows, expected_succ_rows)
80+
} else {
81+
assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows)
82+
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
83+
}
84+
}
85+
}
86+
}
87+
88+
try {
89+
// Load 1000 documents
90+
load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 'documents-1000.json')
91+
92+
sql "sync"
93+
94+
// Enable profile
95+
sql """ set enable_profile = true; """
96+
sql """ set profile_level = 2; """
97+
sql """ set enable_sql_cache = false; """
98+
sql """ set enable_inverted_index_searcher_cache = false; """
99+
sql """ set enable_inverted_index_query_cache = false; """
100+
sql """ set enable_common_expr_pushdown = true; """
101+
sql """ set enable_common_expr_pushdown_for_inverted_index = true; """
102+
sql """ set enable_match_without_inverted_index = false; """
103+
104+
// Execute query with inverted index using profile
105+
def queryId = "test_inverted_index_io_timer_${System.currentTimeMillis()}"
106+
profile("${queryId}") {
107+
run {
108+
sql "/* ${queryId} */ select * from ${indexTbName1} where request match 'images' order by `@timestamp` limit 10"
109+
}
110+
111+
check { profileString, exception ->
112+
def local = 0
113+
def remote = 0
114+
115+
def localMatcher = Pattern.compile("InvertedIndexNumLocalIOTotal:\\s*(\\d+)").matcher(profileString)
116+
if (localMatcher.find()) {
117+
local = Integer.parseInt(localMatcher.group(1))
118+
log.info("InvertedIndexNumLocalIOTotal: {}", local)
119+
}
120+
121+
def remoteMatcher = Pattern.compile("InvertedIndexNumRemoteIOTotal:\\s*(\\d+)").matcher(profileString)
122+
if (remoteMatcher.find()) {
123+
remote = Integer.parseInt(remoteMatcher.group(1))
124+
log.info("InvertedIndexNumRemoteIOTotal: {}", remote)
125+
}
126+
127+
def total = local + remote
128+
assertTrue(total > 0, "InvertedIndexNumLocalIOTotal + InvertedIndexNumRemoteIOTotal should be > 0, got: ${total} (local=${local}, remote=${remote})")
129+
}
130+
}
131+
132+
// Also verify the query returns correct result
133+
def result = sql "select count(*) from ${indexTbName1} where request match 'images'"
134+
assertTrue(result[0][0] > 0, "Should have at least one row matching 'images'")
135+
136+
log.info("Test completed successfully: InvertedIndexIOTimer is greater than 0")
137+
} finally {
138+
// Clean up
139+
// sql "DROP TABLE IF EXISTS ${indexTbName1}"
140+
}
141+
}

0 commit comments

Comments
 (0)