Skip to content

Commit a51ca25

Browse files
authored
Merge branch 'master' into recycle-bin-fix
2 parents 3522f98 + 6384827 commit a51ca25

File tree

53 files changed

+283
-121
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+283
-121
lines changed

be/src/cloud/cloud_stream_load_executor.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ Status CloudStreamLoadExecutor::operate_txn_2pc(StreamLoadContext* ctx) {
104104

105105
Status CloudStreamLoadExecutor::commit_txn(StreamLoadContext* ctx) {
106106
DBUG_EXECUTE_IF("StreamLoadExecutor.commit_txn.block", DBUG_BLOCK);
107+
DBUG_EXECUTE_IF("StreamLoadExecutor.commit_txn.crash", {
108+
LOG(INFO) << "debug point " << DP_NAME << " trigger crash";
109+
volatile int* p = nullptr;
110+
*p = 1;
111+
});
107112
// forward to fe to excute commit transaction for MoW table
108113
if (ctx->is_mow_table() || !config::enable_stream_load_commit_txn_on_be ||
109114
ctx->load_type == TLoadType::ROUTINE_LOAD) {

be/src/cloud/cloud_tablet_mgr.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "common/status.h"
3030
#include "olap/lru_cache.h"
3131
#include "runtime/memory/cache_policy.h"
32+
#include "util/debug_points.h"
3233
#include "util/stack_util.h"
3334

3435
namespace doris {
@@ -148,7 +149,9 @@ CloudTabletMgr::CloudTabletMgr(CloudStorageEngine& engine)
148149
_tablet_map(std::make_unique<TabletMap>()),
149150
_cache(std::make_unique<LRUCachePolicy>(
150151
CachePolicy::CacheType::CLOUD_TABLET_CACHE, config::tablet_cache_capacity,
151-
LRUCacheType::NUMBER, 0, config::tablet_cache_shards, false /*enable_prune*/)) {}
152+
LRUCacheType::NUMBER, /*sweep time*/ 0, config::tablet_cache_shards,
153+
/*element_count_capacity*/ 0, /*enable_prune*/ false,
154+
/*is_lru_k*/ false)) {}
152155

153156
CloudTabletMgr::~CloudTabletMgr() = default;
154157

@@ -163,6 +166,7 @@ Result<std::shared_ptr<CloudTablet>> CloudTabletMgr::get_tablet(int64_t tablet_i
163166
SyncRowsetStats* sync_stats,
164167
bool force_use_only_cached,
165168
bool cache_on_miss) {
169+
DBUG_EXECUTE_IF("CloudTabletMgr::get_tablet.block", DBUG_BLOCK);
166170
// LRU value type. `Value`'s lifetime MUST NOT be longer than `CloudTabletMgr`
167171
class Value : public LRUCacheValueBase {
168172
public:

be/src/cloud/cloud_txn_delete_bitmap_cache.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ namespace doris {
3434

3535
CloudTxnDeleteBitmapCache::CloudTxnDeleteBitmapCache(size_t size_in_bytes)
3636
: LRUCachePolicy(CachePolicy::CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE, size_in_bytes,
37-
LRUCacheType::SIZE, 86400, 4),
37+
LRUCacheType::SIZE, /*stale_sweep_time_s*/ 86400, /*num_shards*/ 4,
38+
/*element_count_capacity*/ 0, /*enable_prune*/ true,
39+
/*is_lru_k*/ false),
3840
_stop_latch(1) {}
3941

4042
CloudTxnDeleteBitmapCache::~CloudTxnDeleteBitmapCache() {

be/src/cloud/injection_point_action.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,17 @@ void set_return_error(const std::string& point, HttpRequest* req) {
279279
HttpChannel::send_reply(req, HttpStatus::OK, "OK");
280280
}
281281

282+
void set_segfault(const std::string& point, HttpRequest* req) {
283+
auto sp = SyncPoint::get_instance();
284+
sp->set_call_back(point, [point](auto&&) {
285+
LOG(INFO) << "injection point hit, point=" << point << " trigger segfault";
286+
// Intentional null dereference to crash the BE for testing.
287+
volatile int* p = nullptr;
288+
*p = 1;
289+
});
290+
HttpChannel::send_reply(req, HttpStatus::OK, "OK");
291+
}
292+
282293
void handle_set(HttpRequest* req) {
283294
auto& point = req->param("name");
284295
if (point.empty()) {
@@ -302,6 +313,9 @@ void handle_set(HttpRequest* req) {
302313
} else if (behavior == "return_error") {
303314
set_return_error(point, req);
304315
return;
316+
} else if (behavior == "segfault") {
317+
set_segfault(point, req);
318+
return;
305319
}
306320
HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, "unknown behavior: " + behavior);
307321
}
@@ -377,13 +391,15 @@ InjectionPointAction::InjectionPointAction() = default;
377391
// which is an int, valid values can be found in status.h, e.g. -235 or -230,
378392
// if `code` is not present return Status::InternalError. Optional `probability`
379393
// determines the percentage of times to inject the error (default 100).
394+
// * segfault: dereference a null pointer to crash BE intentionally
380395
// ```
381396
// curl "be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=sleep&duration=${x_millsec}" # sleep x millisecs
382397
// curl "be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return" # return void
383398
// curl "be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_ok" # return ok
384399
// curl "be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_error" # internal error
385400
// curl "be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_error&code=${code}" # -235
386401
// curl "be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=return_error&code=${code}&probability=50" # inject with 50% probability
402+
// curl "be_ip:http_port/api/injection_point/set?name=${injection_point_name}&behavior=segfault" # crash BE
387403
// ```
388404
void InjectionPointAction::handle(HttpRequest* req) {
389405
LOG(INFO) << "handle InjectionPointAction " << req->debug_string();

be/src/exec/schema_scanner/schema_file_cache_info_scanner.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ namespace doris {
3030
std::vector<SchemaScanner::ColumnDesc> SchemaFileCacheInfoScanner::_s_tbls_columns = {
3131
// name, type, size, is_null
3232
{"HASH", TYPE_STRING, sizeof(StringRef), true},
33+
{"OFFSET", TYPE_BIGINT, sizeof(int64_t), true},
3334
{"TABLET_ID", TYPE_BIGINT, sizeof(int64_t), true},
3435
{"SIZE", TYPE_BIGINT, sizeof(int64_t), true},
3536
{"TYPE", TYPE_STRING, sizeof(StringRef), true},
@@ -68,7 +69,7 @@ Status SchemaFileCacheInfoScanner::_fill_block_impl(vectorized::Block* block) {
6869
}
6970

7071
// Collect all cache entries from all file cache instances
71-
std::vector<std::tuple<std::string, int64_t, int64_t, int, std::string>> cache_entries;
72+
std::vector<std::tuple<std::string, int64_t, int64_t, int64_t, int, std::string>> cache_entries;
7273

7374
// Get all cache instances using the public getter
7475
const auto& caches = file_cache_factory->get_caches();
@@ -116,7 +117,8 @@ Status SchemaFileCacheInfoScanner::_fill_block_impl(vectorized::Block* block) {
116117
std::string hash_str = key.hash.to_string();
117118

118119
// Add to cache entries
119-
cache_entries.emplace_back(hash_str, key.tablet_id, value.size, value.type, cache_path);
120+
cache_entries.emplace_back(hash_str, static_cast<int64_t>(key.offset), key.tablet_id,
121+
static_cast<int64_t>(value.size), value.type, cache_path);
120122

121123
iterator->next();
122124
}
@@ -137,21 +139,21 @@ Status SchemaFileCacheInfoScanner::_fill_block_impl(vectorized::Block* block) {
137139

138140
for (size_t row_idx = 0; row_idx < row_num; ++row_idx) {
139141
const auto& entry = cache_entries[row_idx];
140-
const auto& [hash, tablet_id, size, type, cache_path] = entry;
142+
const auto& [hash, offset, tablet_id, size, type, cache_path] = entry;
141143

142144
if (col_desc.type == TYPE_STRING) {
143145
switch (col_idx) {
144146
case 0: // HASH
145147
column_values[row_idx] = hash;
146148
break;
147-
case 3: // TYPE
149+
case 4: // TYPE
148150
column_values[row_idx] = doris::io::cache_type_to_string(
149151
static_cast<doris::io::FileCacheType>(type));
150152
break;
151-
case 4: // REMOTE_PATH
153+
case 5: // REMOTE_PATH
152154
column_values[row_idx] = ""; // TODO: Implement remote path retrieval
153155
break;
154-
case 5: // CACHE_PATH
156+
case 6: // CACHE_PATH
155157
column_values[row_idx] = cache_path;
156158
break;
157159
default:
@@ -163,13 +165,16 @@ Status SchemaFileCacheInfoScanner::_fill_block_impl(vectorized::Block* block) {
163165
datas[row_idx] = &str_refs[row_idx];
164166
} else if (col_desc.type == TYPE_BIGINT) {
165167
switch (col_idx) {
166-
case 1: // TABLET_ID
168+
case 1: // OFFSET
169+
int64_vals[row_idx] = offset;
170+
break;
171+
case 2: // TABLET_ID
167172
int64_vals[row_idx] = tablet_id;
168173
break;
169-
case 2: // SIZE
174+
case 3: // SIZE
170175
int64_vals[row_idx] = size;
171176
break;
172-
case 6: // BE_ID
177+
case 7: // BE_ID
173178
int64_vals[row_idx] = ExecEnv::GetInstance()->cluster_info()->backend_id;
174179
break;
175180
default:

be/src/olap/page_cache.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,24 +118,27 @@ class StoragePageCache {
118118
DataPageCache(size_t capacity, uint32_t num_shards)
119119
: LRUCachePolicy(CachePolicy::CacheType::DATA_PAGE_CACHE, capacity,
120120
LRUCacheType::SIZE, config::data_page_cache_stale_sweep_time_sec,
121-
num_shards, DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, true, true) {
122-
}
121+
num_shards, /*element_count_capacity*/ 0, /*enable_prune*/ true,
122+
/*is lru-k*/ true) {}
123123
};
124124

125125
class IndexPageCache : public LRUCachePolicy {
126126
public:
127127
IndexPageCache(size_t capacity, uint32_t num_shards)
128128
: LRUCachePolicy(CachePolicy::CacheType::INDEXPAGE_CACHE, capacity,
129129
LRUCacheType::SIZE, config::index_page_cache_stale_sweep_time_sec,
130-
num_shards) {}
130+
num_shards, /*element_count_capacity*/ 0, /*enable_prune*/ true,
131+
/*is lru-k*/ false) {}
131132
};
132133

133134
class PKIndexPageCache : public LRUCachePolicy {
134135
public:
135136
PKIndexPageCache(size_t capacity, uint32_t num_shards)
136137
: LRUCachePolicy(CachePolicy::CacheType::PK_INDEX_PAGE_CACHE, capacity,
137138
LRUCacheType::SIZE,
138-
config::pk_index_page_cache_stale_sweep_time_sec, num_shards) {}
139+
config::pk_index_page_cache_stale_sweep_time_sec, num_shards,
140+
/*element_count_capacity*/ 0, /*enable_prune*/ true,
141+
/*is lru-k*/ false) {}
139142
};
140143

141144
static constexpr uint32_t kDefaultNumShards = 16;

be/src/olap/rowset/segment_v2/condition_cache.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@ class ConditionCache : public LRUCachePolicy {
8383

8484
ConditionCache(size_t capacity, uint32_t num_shards)
8585
: LRUCachePolicy(CachePolicy::CacheType::CONDITION_CACHE, capacity, LRUCacheType::SIZE,
86-
config::inverted_index_cache_stale_sweep_time_sec, num_shards) {}
86+
config::inverted_index_cache_stale_sweep_time_sec, num_shards,
87+
/*element_count_capacity*/ 0, /*enable_prune*/ true,
88+
/*is_lru_k*/ true) {}
8789

8890
bool lookup(const CacheKey& key, ConditionCacheHandle* handle);
8991

be/src/olap/rowset/segment_v2/inverted_index_cache.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,16 +106,17 @@ class InvertedIndexSearcherCache {
106106
: LRUCachePolicy(CachePolicy::CacheType::INVERTEDINDEX_SEARCHER_CACHE, capacity,
107107
LRUCacheType::SIZE,
108108
config::inverted_index_cache_stale_sweep_time_sec, num_shards,
109-
element_count_capacity, true) {}
109+
element_count_capacity, /*enable_prune*/ true,
110+
/*is lru k*/ false) {}
110111
InvertedIndexSearcherCachePolicy(size_t capacity, uint32_t num_shards,
111112
uint32_t element_count_capacity,
112113
CacheValueTimeExtractor cache_value_time_extractor,
113114
bool cache_value_check_timestamp)
114-
: LRUCachePolicy(CachePolicy::CacheType::INVERTEDINDEX_SEARCHER_CACHE, capacity,
115-
LRUCacheType::SIZE,
116-
config::inverted_index_cache_stale_sweep_time_sec, num_shards,
117-
element_count_capacity, cache_value_time_extractor,
118-
cache_value_check_timestamp, true) {}
115+
: LRUCachePolicy(
116+
CachePolicy::CacheType::INVERTEDINDEX_SEARCHER_CACHE, capacity,
117+
LRUCacheType::SIZE, config::inverted_index_cache_stale_sweep_time_sec,
118+
num_shards, element_count_capacity, cache_value_time_extractor,
119+
cache_value_check_timestamp, /*enable_prune*/ true, /*is lru k*/ false) {}
119120
};
120121
// Insert a cache entry by key.
121122
// And the cache entry will be returned in handle.
@@ -229,7 +230,9 @@ class InvertedIndexQueryCache : public LRUCachePolicy {
229230
InvertedIndexQueryCache(size_t capacity, uint32_t num_shards)
230231
: LRUCachePolicy(CachePolicy::CacheType::INVERTEDINDEX_QUERY_CACHE, capacity,
231232
LRUCacheType::SIZE, config::inverted_index_cache_stale_sweep_time_sec,
232-
num_shards) {}
233+
num_shards,
234+
/*element_count_capacity*/ 0, /*enable_prune*/ true,
235+
/*is_lru_k*/ true) {}
233236

234237
bool lookup(const CacheKey& key, InvertedIndexQueryCacheHandle* handle);
235238

be/src/olap/schema_cache.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,9 @@ class SchemaCache : public LRUCachePolicy {
8787

8888
SchemaCache(size_t capacity)
8989
: LRUCachePolicy(CachePolicy::CacheType::SCHEMA_CACHE, capacity, LRUCacheType::NUMBER,
90-
config::schema_cache_sweep_time_sec) {}
90+
config::schema_cache_sweep_time_sec, /*num shards*/ 32,
91+
/*element_count_capacity*/ 0, /*enable_prune*/ true,
92+
/*is lru-k*/ false) {}
9193

9294
private:
9395
static constexpr char SCHEMA_DELIMITER = '-';

be/src/olap/segment_loader.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,11 @@ class SegmentCache : public LRUCachePolicy {
8383
};
8484

8585
SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit)
86-
: LRUCachePolicy(
87-
CachePolicy::CacheType::SEGMENT_CACHE, memory_bytes_limit, LRUCacheType::SIZE,
88-
config::tablet_rowset_stale_sweep_time_sec, DEFAULT_LRU_CACHE_NUM_SHARDS * 2,
89-
cast_set<uint32_t>(segment_num_limit), config::enable_segment_cache_prune) {}
86+
: LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE, memory_bytes_limit,
87+
LRUCacheType::SIZE, config::tablet_rowset_stale_sweep_time_sec,
88+
/*num shards*/ 64,
89+
/*element count capacity */ cast_set<uint32_t>(segment_num_limit),
90+
config::enable_segment_cache_prune, /*is lru-k*/ true) {}
9091

9192
// Lookup the given segment in the cache.
9293
// If the segment is found, the cache entry will be written into handle.

0 commit comments

Comments
 (0)