Skip to content

Commit eececa2

Browse files
committed
add monitor wait and bpftime is_lock
1 parent 7c5ba12 commit eececa2

22 files changed

+345
-168
lines changed

artifact/mlc-cxlmemsim.txt

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
Intel(R) Memory Latency Checker - v3.11b
2+
Measuring idle latencies for sequential access (in ns)...
3+
Numa node
4+
Numa node 0
5+
0 292.9
6+
7+
Measuring Peak Injection Memory Bandwidths for the system
8+
Bandwidths are in MB/sec (1 MB/sec = 1,000,000 Bytes/sec)
9+
Using all the threads from each core if Hyper-threading is enabled
10+
Using traffic with the following read-write ratios
11+
ALL Reads : 31031.7
12+
3:1 Reads-Writes : 43700.9
13+
2:1 Reads-Writes : 42475.2
14+
1:1 Reads-Writes : 45973.2
15+
Stream-triad like: 28339.2
16+
17+
Measuring Memory Bandwidths between nodes within system
18+
Bandwidths are in MB/sec (1 MB/sec = 1,000,000 Bytes/sec)
19+
Using all the threads from each core if Hyper-threading is enabled
20+
Using Read-only traffic type
21+
Numa node
22+
Numa node 0
23+
0 50459.9
24+
25+
Measuring Loaded Latencies for the system
26+
Using all the threads from each core if Hyper-threading is enabled
27+
Using Read-only traffic type
28+
Inject Latency Bandwidth
29+
Delay (ns) MB/sec
30+
==========================
31+
00000 708.06 50473.7
32+
00002 713.23 50408.7
33+
00008 718.64 50425.6
34+
00015 696.64 50381.3
35+
00050 637.99 50585.4
36+
00100 586.28 50597.8
37+
00200 567.09 50641.2
38+
00300 512.45 50732.0
39+
00400 469.48 50550.7
40+
00500 317.21 44991.4
41+
00700 216.09 33815.6
42+
01000 188.95 24427.9
43+
01300 178.91 19323.6
44+
01700 175.15 15074.7
45+
02500 168.75 10480.7
46+
03500 164.78 7691.6
47+
05000 161.10 5543.5
48+
09000 155.73 3291.7
49+
20000 174.09 1668.7
50+
51+
Measuring cache-to-cache transfer latency (in ns)...
52+
Local Socket L2->L2 HIT latency 65.4
53+
Local Socket L2->L2 HITM latency 78.6
54+
55+
Exiting...
56+
57+
58+
========== Process 0[tgid=1003083, tid=1003083] statistics summary ==========
59+
emulated time =264.9391297
60+
total delay =90.98129678699999
61+
PEBS sample total 692 267577
62+
LBR sample total 10
63+
bpftime sample total 112362
64+
CXLController:
65+
Total system memory capacity: 60GB
66+
Page Type: PAGE
67+
Global Counter:
68+
Local: 0
69+
Remote: 611
70+
HITM: 552085377
71+
Topology:
72+
Switch:
73+
Events:
74+
Load: 0
75+
Store: 0
76+
Conflict: 15
77+
Switch:
78+
Events:
79+
Load: 0
80+
Store: 408
81+
Conflict: 57
82+
Expander:
83+
Events:
84+
Load: 0
85+
Store: 204
86+
Migrate in: 0
87+
Migrate out: 0
88+
Hit Old: 0
89+
Expander:
90+
Events:
91+
Load: 0
92+
Store: 204
93+
Migrate in: 0
94+
Migrate out: 0
95+
Hit Old: 0
96+
Expander:
97+
Events:
98+
Load: 0
99+
Store: 203
100+
Migrate in: 0
101+
Migrate out: 0
102+
Hit Old: 0
103+
104+
Statistics:
105+
Number of Switches: 2
106+
Number of Endpoints: 3
107+
Number of Threads created: 1
108+
Memory Freed: 1011780 bytes
109+

include/bpftimeruntime.h

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,48 @@
1717
#include <linux/bpf.h>
1818
#include <string>
1919
#include <sys/types.h>
20+
#include "bpftime_config.hpp"
21+
#include "bpftime_logger.hpp"
22+
#include "bpftime_shm.hpp"
23+
template <typename K, typename V>
24+
class BPFUpdater {
25+
public:
26+
int map_fd;
27+
BPFUpdater(int map_fd) : map_fd(map_fd) {}
28+
29+
void update(K key, V value) {
30+
int key1 = 0;
31+
bpftime_map_get_next_key(map_fd, &key1, &key); // process map
32+
auto item2 = (struct proc_info *)bpftime_map_lookup_elem(map_fd, &key); // allocs map
33+
item2->sleep_time = (uint64_t)value;
34+
int ret = bpftime_map_update_elem(map_fd, &key, item2, BPF_ANY);
35+
if (ret != 0) {
36+
SPDLOG_ERROR("Error updating map: {}\n", strerror(errno));
37+
throw std::runtime_error("Error updating the bpf map");
38+
}
39+
}
2040

41+
bool get(K key) {
42+
int key1 = 0;
43+
bpftime_map_get_next_key(map_fd, &key1, &key); // process map
44+
auto item2 = (struct proc_info *)bpftime_map_lookup_elem(map_fd, &key); // allocs map
45+
if (item2 == nullptr) {
46+
return false;
47+
}
48+
return item2->is_locked;
49+
}
50+
};
2151
class BpfTimeRuntime {
2252
public:
2353
BpfTimeRuntime(pid_t, std::string);
2454
~BpfTimeRuntime();
2555

2656
int read(CXLController *, BPFTimeRuntimeElem *);
27-
57+
BPFUpdater<uint64_t,uint64_t> *updater;
2858
pid_t tid;
2959
};
60+
61+
3062
#define u64 unsigned long long
3163
#define u32 unsigned int
3264
#else
@@ -62,6 +94,7 @@ struct proc_info {
6294
u64 current_pid; // 当前进程 ID
6395
u64 current_tid; // 当前线程 ID
6496
u64 sleep_time; // 睡眠时间
97+
bool is_locked; // 是否锁定
6598
struct mem_info mem_info;
6699
};
67100

include/cxlcontroller.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class MigrationPolicy : public Policy {
6666
class PagingPolicy : public Policy {
6767
public:
6868
PagingPolicy();
69-
int compute_once(CXLController *) override{};
69+
int compute_once(CXLController *) override{return 0;};
7070
// paging related
7171
virtual uint64_t check_page_table_walk(uint64_t virt_addr, uint64_t phys_addr, bool is_remote, page_type pt) {
7272
return 0;

include/cxlendpoint.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ class CXLSwitch : public CXLEndPoint {
157157
// TODO get the approximate congestion and target done time
158158
std::unordered_map<uint64_t, uint64_t> timeseries_map;
159159

160-
double congestion_latency = 90; // 200ns is the latency of the switch
160+
double congestion_latency = 0.02; // 200ns is the latency of the switch
161161
explicit CXLSwitch(int id);
162162
std::vector<std::tuple<uint64_t, uint64_t>> get_access(uint64_t timestamp) override;
163163
double calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem,

include/lbr.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,9 @@ struct lbr_sample {
3737
perf_event_header header;
3838
uint32_t pid;
3939
uint32_t tid;
40-
// uint64_t nr;
41-
// uint64_t ips[4];
4240
uint64_t timestamp;
4341
uint32_t cpu;
4442
uint64_t nr2;
45-
// uint64_t hw_idx;
4643
lbr lbrs[32];
4744
cntr counters[32];
4845
};

include/monitor.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ class Monitor {
6060
std::mutex wanted_delay_mutex;
6161
timespec injected_delay; // recorded time for injected
6262
timespec wasted_delay; // recorded time for calling between continue and calculation
63-
timespec squabble_delay; // inj-was
63+
constexpr static timespec interval_delay = {0,10000000}; // inj-was
64+
static timespec last_delay; // last delay
6465
Elem elem[2]; // before & after
6566
Elem *before, *after;
6667
double total_delay;
@@ -72,7 +73,7 @@ class Monitor {
7273

7374
Monitor(const Monitor &other)
7475
: tgid(other.tgid), tid(other.tid), cpu_core(other.cpu_core), wanted_delay(other.wanted_delay),
75-
injected_delay(other.injected_delay), squabble_delay(other.squabble_delay),
76+
injected_delay(other.injected_delay),
7677
before(nullptr), // Will be set after copying elements
7778
after(nullptr), // Will be set after copying elements
7879
total_delay(other.total_delay), start_exec_ts(other.start_exec_ts), end_exec_ts(other.end_exec_ts),

include/policy.h

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class HeatAwareMigrationPolicy : public MigrationPolicy {
9090
};
9191

9292
class HugePagePolicy : public PagingPolicy {
93-
private:
93+
public:
9494
// 页表遍历延迟基准值(纳秒)
9595
uint64_t ptw_base_latency_local; // 本地内存页表遍历基准延迟
9696
uint64_t ptw_base_latency_remote; // 远程内存页表遍历基准延迟
@@ -145,7 +145,6 @@ class HugePagePolicy : public PagingPolicy {
145145
TLBCache tlb_2m; // 2MB页面的TLB
146146
TLBCache tlb_1g; // 1GB页面的TLB
147147
CXLHugePageEvent stats; // 统计信息
148-
public:
149148
explicit HugePagePolicy(uint64_t local_latency = 100, uint64_t remote_latency = 300)
150149
: ptw_base_latency_local(local_latency), ptw_base_latency_remote(remote_latency),
151150
tlb_4k(64), // 4KB页面TLB容量(较大)
@@ -331,7 +330,7 @@ class HugePagePolicy : public PagingPolicy {
331330
};
332331

333332
class PageTableAwarePolicy : public PagingPolicy {
334-
private:
333+
public:
335334
// 页表缓存,用于追踪已经转换过的虚拟地址
336335
std::unordered_map<uint64_t, uint64_t> va_pa_cache;
337336
// 页表访问延迟(纳秒)
@@ -344,7 +343,6 @@ class PageTableAwarePolicy : public PagingPolicy {
344343
// 清理间隔
345344
uint64_t cleanup_interval;
346345

347-
public:
348346
explicit PageTableAwarePolicy(uint64_t local_latency = 100, uint64_t remote_latency = 300,
349347
uint64_t cleanup_interval = 10000000)
350348
: ptw_latency_local(local_latency), ptw_latency_remote(remote_latency), last_cleanup_timestamp(0),
@@ -425,6 +423,19 @@ class FIFOPolicy : public CachingPolicy {
425423
public:
426424
FIFOPolicy() = default;
427425
int compute_once(CXLController *) override;
426+
std::vector<uint64_t> get_invalidation_list(CXLController *controller) override {
427+
std::vector<uint64_t> to_invalidate;
428+
for (const auto &[timestamp, info] : controller->occupation) {
429+
to_invalidate.push_back(info.address);
430+
}
431+
return to_invalidate;
432+
};
433+
bool should_cache(uint64_t addr, uint64_t timestamp) override {
434+
return false;
435+
};
436+
bool should_invalidate(uint64_t addr, uint64_t timestamp) override {
437+
return false;
438+
};
428439
};
429440

430441
// 基于访问频率的后向失效策略
@@ -547,12 +558,11 @@ class FrequencyBasedMigrationPolicy : public MigrationPolicy {
547558

548559
// 基于负载平衡的迁移策略
549560
class LoadBalancingMigrationPolicy : public MigrationPolicy {
550-
private:
561+
public:
551562
double imbalance_threshold; // 负载不平衡阈值
552563
uint64_t migration_interval; // 迁移间隔
553564
uint64_t last_migration; // 上次迁移时间
554565

555-
public:
556566
LoadBalancingMigrationPolicy(double threshold = 0.2, uint64_t interval = 5000000)
557567
: imbalance_threshold(threshold), migration_interval(interval), last_migration(0) {}
558568

@@ -706,12 +716,11 @@ class LoadBalancingMigrationPolicy : public MigrationPolicy {
706716

707717
// 基于局部性的迁移策略
708718
class LocalityBasedMigrationPolicy : public MigrationPolicy {
709-
private:
719+
public:
710720
std::unordered_map<uint64_t, std::vector<uint64_t>> page_access_pattern; // 页面访问模式
711721
uint64_t pattern_threshold; // 模式识别阈值
712722
uint64_t page_size; // 页面大小
713723

714-
public:
715724
LocalityBasedMigrationPolicy(uint64_t threshold = 5, uint64_t p_size = 4096)
716725
: pattern_threshold(threshold), page_size(p_size) {}
717726

@@ -780,10 +789,9 @@ class LocalityBasedMigrationPolicy : public MigrationPolicy {
780789

781790
// 基于数据寿命的迁移策略
782791
class LifetimeBasedMigrationPolicy : public MigrationPolicy {
783-
private:
792+
public:
784793
uint64_t lifetime_threshold; // 数据寿命阈值
785794

786-
public:
787795
LifetimeBasedMigrationPolicy(uint64_t threshold = 1000000) : lifetime_threshold(threshold) {}
788796

789797
int compute_once(CXLController *controller) override {
@@ -828,10 +836,9 @@ class LifetimeBasedMigrationPolicy : public MigrationPolicy {
828836

829837
// 混合多策略迁移
830838
class HybridMigrationPolicy : public MigrationPolicy {
831-
private:
839+
public:
832840
std::vector<MigrationPolicy *> policies; // 多个迁移策略
833841

834-
public:
835842
HybridMigrationPolicy() {}
836843

837844
// 添加策略

include/rob.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class Rob {
3434
const size_t maxSize_;
3535
std::deque<InstructionGroup> queue_; // ROB队列
3636
int64_t stallCount_ = 0; // 停顿计数
37+
int64_t stallEventCount_ = 0; // 停顿事件计数
3738
int64_t cur_latency = 0;
3839
int64_t totalLatency_ = 0;
3940
int64_t currentCycle_ = 0; // 当前周期
@@ -46,6 +47,7 @@ class Rob {
4647

4748
// 性能统计
4849
int64_t getStallCount() const { return stallCount_; }
50+
int64_t getStallEventCount() const { return stallEventCount_;}
4951
int64_t getCurrentCycle() const { return currentCycle_; }
5052
double getAverageLatency() const { return queue_.empty() ? 0 : static_cast<double>(totalLatency_) / queue_.size(); }
5153
};

microbench/ld.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#define STR(x) STR_HELPER(x)
2727

2828
#define MOVE_SIZE 128
29-
#define MAP_SIZE (long)(1024 * 1024 * 1024)
29+
#define MAP_SIZE (long)( 1024)
3030
#define CACHELINE_SIZE 64
3131

3232
#ifndef FENCE_COUNT

microbench/ld_serial.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
#define STR(x) STR_HELPER(x)
2828

2929
#define MOVE_SIZE 128
30-
#define MAP_SIZE (long)(1024 * 1024 * 1024)
30+
#define MAP_SIZE (long)(1024)
3131
#define CACHELINE_SIZE 64
3232

3333
#ifndef FENCE_COUNT

0 commit comments

Comments
 (0)