Skip to content

Commit c6e0f93

Browse files
committed
add new rob implementation
1 parent 74584bc commit c6e0f93

File tree

13 files changed

+396
-161
lines changed

13 files changed

+396
-161
lines changed

include/cxlcontroller.h

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include <queue>
1818
#include <string_view>
1919

20-
#define ROB_SIZE 512
2120
class Monitors;
2221
struct mem_stats;
2322
struct proc_info;
@@ -59,7 +58,7 @@ class PagingPolicy : public Policy {
5958
// paging related
6059
};
6160

62-
class CachingPolicy: public Policy {
61+
class CachingPolicy : public Policy {
6362
public:
6463
CachingPolicy();
6564
// paging related
@@ -74,7 +73,7 @@ class CXLController : public CXLSwitch {
7473
PagingPolicy *paging_policy{};
7574
CachingPolicy *caching_policy{};
7675
CXLCounter counter;
77-
std::map<uint64_t, uint64_t> occupation;
76+
std::map<uint64_t, occupation_info> occupation;
7877
page_type page_type_; // percentage
7978
// no need for va pa map because v-indexed will not caught by us
8079
int num_switches = 0;
@@ -87,32 +86,23 @@ class CXLController : public CXLSwitch {
8786
// ring buffer
8887
std::queue<lbr> ring_buffer;
8988
// rob info
90-
typedef struct {
91-
std::map<int,int64_t> m_bandwidth, m_count;
92-
int64_t llcm_base, llcm_count, ins_count;
93-
} rob_info;
94-
typedef struct {
95-
rob_info rob;
96-
std::queue<int> llcm_type;
97-
std::queue<int> llcm_type_rob;
98-
} thread_info;
9989
std::unordered_map<uint64_t, thread_info> thread_map;
10090

101-
explicit CXLController(std::array<Policy*,4> p, int capacity, page_type page_type_, int epoch, double dramlatency);
91+
explicit CXLController(std::array<Policy *, 4> p, int capacity, page_type page_type_, int epoch,
92+
double dramlatency);
10293
void construct_topo(std::string_view newick_tree);
10394
void insert_end_point(CXLMemExpander *end_point);
10495
std::vector<std::string> tokenize(const std::string_view &s);
10596
std::tuple<double, std::vector<uint64_t>> calculate_congestion() override;
10697
void set_epoch(int epoch) override;
107-
std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) override;
108-
double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
98+
std::vector<std::tuple<uint64_t, uint64_t>> get_access(uint64_t timestamp) override;
99+
double calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem,
109100
double dramlatency) override; // traverse the tree to calculate the latency
110-
double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) override;
101+
double calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) override;
111102
void insert_one(thread_info &t_info, lbr &lbr);
112103
int insert(uint64_t timestamp, uint64_t tid, lbr lbrs[32], cntr counters[32]);
113104
int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr, int index) override;
114105
void delete_entry(uint64_t addr, uint64_t length) override;
115-
std::string output() override;
116106
void set_stats(mem_stats stats);
117107
static void set_process_info(const proc_info &process_info);
118108
static void set_thread_info(const proc_info &thread_info);

include/cxlendpoint.h

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,28 @@
1515
#include "cxlcounter.h"
1616
#include "helper.h"
1717
#include <list>
18+
#include <queue>
1819
#include <map>
1920
#include <string>
2021
#include <tuple>
2122
#include <unordered_map>
2223
#include <vector>
24+
#define ROB_SIZE 512
2325

26+
struct occupation_info {
27+
uint64_t timestamp;
28+
uint64_t address;
29+
uint64_t access_count;
30+
};
31+
struct rob_info {
32+
std::map<int, int64_t> m_bandwidth, m_count;
33+
int64_t llcm_base, llcm_count, ins_count;
34+
};
35+
struct thread_info {
36+
rob_info rob;
37+
std::queue<int> llcm_type;
38+
std::queue<int> llcm_type_rob;
39+
};
2440
// Forward declarations
2541
class CXLController;
2642
class CXLEndPoint {
@@ -30,22 +46,22 @@ class CXLEndPoint {
3046
private:
3147
virtual void set_epoch(int epoch) = 0;
3248
virtual void free_stats(double size) = 0;
33-
virtual std::string output() = 0;
3449
virtual void delete_entry(uint64_t addr, uint64_t length) = 0;
35-
virtual double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
50+
virtual double calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem,
3651
double dramlatency) = 0; // traverse the tree to calculate the latency
37-
virtual double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) = 0;
52+
virtual double calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) = 0;
3853
virtual int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr,
3954
int index) = 0; // 0 not this endpoint, 1 store, 2 load, 3 prefetch
40-
virtual std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) = 0;
55+
virtual std::vector<std::tuple<uint64_t, uint64_t>> get_access(uint64_t timestamp) = 0;
4156
};
4257

4358
class CXLMemExpander : public CXLEndPoint {
4459
public:
45-
EmuCXLBandwidth bandwidth;
46-
EmuCXLLatency latency;
60+
EmuCXLBandwidth bandwidth{};
61+
EmuCXLLatency latency{};
4762
uint64_t capacity;
48-
std::map<uint64_t, uint64_t> occupation; // timestamp, pa
63+
64+
std::vector<occupation_info> occupation; // timestamp, pa
4965
CXLMemExpanderEvent counter{};
5066
CXLMemExpanderEvent last_counter{};
5167

@@ -58,15 +74,14 @@ class CXLMemExpander : public CXLEndPoint {
5874
uint64_t last_timestamp = 0;
5975
int id = -1;
6076
CXLMemExpander(int read_bw, int write_bw, int read_lat, int write_lat, int id, int capacity);
61-
std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) override;
77+
std::vector<std::tuple<uint64_t, uint64_t>> get_access(uint64_t timestamp) override;
6278
void set_epoch(int epoch) override;
6379
void free_stats(double size) override;
6480
int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr, int index) override;
65-
double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
81+
double calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem,
6682
double dramlatency) override; // traverse the tree to calculate the latency
67-
double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) override;
83+
double calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) override;
6884
void delete_entry(uint64_t addr, uint64_t length) override;
69-
std::string output() override;
7085
};
7186
class CXLSwitch : public CXLEndPoint {
7287
public:
@@ -76,18 +91,21 @@ class CXLSwitch : public CXLEndPoint {
7691
int id = -1;
7792
int epoch = 0;
7893
uint64_t last_timestamp = 0;
79-
// get the approximate congestion and target done time
94+
// TODO get the approximate congestion and target done time
8095
std::unordered_map<uint64_t, uint64_t> timeseries_map;
8196

82-
double congestion_latency = 0.02; // us
97+
double congestion_latency = 90; // 200ns is the latency of the switch
8398
explicit CXLSwitch(int id);
84-
std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) override;
85-
double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
99+
std::vector<std::tuple<uint64_t, uint64_t>> get_access(uint64_t timestamp) override;
100+
double calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem,
86101
double dramlatency) override; // traverse the tree to calculate the latency
87-
double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) override;
102+
double calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) override;
103+
double get_endpoint_rob_latency(CXLMemExpander* endpoint,
104+
const std::vector<std::tuple<uint64_t, uint64_t>>& accesses,
105+
const thread_info& t_info,
106+
double dramlatency);
88107
int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr, int index) override;
89108
void delete_entry(uint64_t addr, uint64_t length) override;
90-
std::string output() override;
91109
virtual std::tuple<double, std::vector<uint64_t>> calculate_congestion();
92110
void set_epoch(int epoch) override;
93111
void free_stats(double size) override;

include/helper.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ enum {
3434
CPU_MDL_SKX = 85,
3535
CPU_MDL_SPR = 143,
3636
CPU_MDL_ADL = 151,
37+
CPU_MDL_GNR = 173,
38+
CPU_MDL_SRF = 175,
3739
CPU_MDL_LNL = 189,
3840
CPU_MDL_ARL = 198,
39-
CPU_MDL_SRF = 201,
4041
CPU_MDL_END = 0x0ffff
4142
};
4243
class Incore;

include/monitor.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class Monitors {
4444
void stop_all(int);
4545
void run_all(int);
4646
Monitor *get_mon(int, int);
47-
int enable(const uint32_t, const uint32_t, bool, uint64_t, const int32_t);
47+
int enable(uint32_t, uint32_t, bool, uint64_t, int32_t);
4848
void disable(uint32_t target);
4949
int terminate(uint32_t, uint32_t, int32_t);
5050
bool check_all_terminated(uint32_t);
@@ -66,9 +66,9 @@ class Monitor {
6666
double total_delay;
6767
timespec start_exec_ts, end_exec_ts;
6868
bool is_process;
69-
PEBS *pebs_ctx;
70-
LBR *lbr_ctx;
71-
BpfTimeRuntime *bpftime_ctx;
69+
PEBS *pebs_ctx{};
70+
LBR *lbr_ctx{};
71+
BpfTimeRuntime *bpftime_ctx{};
7272

7373
Monitor(const Monitor &other)
7474
: tgid(other.tgid), tid(other.tid), cpu_core(other.cpu_core), wanted_delay(other.wanted_delay),

microbench/cache-miss.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// 假设每个缓存行是64字节,LLC是8MB
77
#define ARRAY_SIZE (32 * 1024 * 1024) // 32MB
88
#define STRIDE 64 // 以缓存行大小作为步长
9-
#define ITERATIONS 1000000000
9+
#define ITERATIONS 10000000
1010

1111
int main() {
1212
// 分配大数组

src/bpftimeruntime.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ int BpfTimeRuntime::read(CXLController *controller, BPFTimeRuntimeElem *elem) {
4545
for (int i = 6; i < 11; i++) {
4646
int key = 0;
4747
int key1 = 0;
48-
auto item1 = bpftime_map_get_next_key(i, &key1, &key); // process map
48+
bpftime_map_get_next_key(i, &key1, &key); // process map
4949
auto item2 = bpftime_map_lookup_elem(i, &key); // allocs map
5050
SPDLOG_DEBUG("Process map key: {} {} {}", key1, key, tid);
5151
if (i == 6 && item2 != nullptr) {

src/cxlcontroller.cpp

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -60,36 +60,14 @@ CXLController::CXLController(std::array<Policy *, 4> p, int capacity, page_type
6060
// deferentiate R/W for multi reader multi writer
6161
}
6262

63-
double CXLController::calculate_latency(const std::vector<std::tuple<int, int>> &elem, double dramlatency) {
63+
double CXLController::calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem, double dramlatency) {
6464
return CXLSwitch::calculate_latency(elem, dramlatency);
6565
}
6666

67-
double CXLController::calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) {
67+
double CXLController::calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) {
6868
return CXLSwitch::calculate_bandwidth(elem);
6969
}
7070

71-
std::string CXLController::output() {
72-
std::string res;
73-
if (!this->switches.empty()) {
74-
res += "(";
75-
res += this->switches[0]->output();
76-
for (size_t i = 1; i < this->switches.size(); ++i) {
77-
res += ",";
78-
res += this->switches[i]->output();
79-
}
80-
res += ")";
81-
}
82-
if (!this->expanders.empty()) {
83-
res += "(";
84-
res += this->expanders[0]->output();
85-
for (size_t i = 1; i < this->expanders.size(); ++i) {
86-
res += ",";
87-
res += this->expanders[i]->output();
88-
}
89-
res += ")";
90-
}
91-
return res;
92-
}
9371

9472
void CXLController::set_stats(mem_stats stats) {
9573
// SPDLOG_INFO("stats: {} {} {} {} {}", stats.total_allocated, stats.total_freed, stats.current_usage,
@@ -187,17 +165,37 @@ int CXLController::insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr,
187165
return res; // 返回实际的结果而不是固定的true
188166
}
189167
int CXLController::insert(uint64_t timestamp, uint64_t tid, lbr lbrs[32], cntr counters[32]) {
168+
// 处理LBR记录
190169
for (int i = 0; i < 32; i++) {
191170
if (!lbrs[i].from) {
192171
break;
193172
}
194173
insert_one(thread_map[tid], lbrs[i]);
195-
// TODO calculate delay
196-
// timestamp
197174
}
198-
auto all_access = get_access(timestamp); // get the current branch access?
199-
latency_lat += calculate_latency(all_access, dramlatency); // insert once
200-
bandwidth_lat += calculate_bandwidth(all_access); // insert once
175+
176+
auto all_access = get_access(timestamp);
177+
auto& t_info = thread_map[tid];
178+
179+
// 对每个endpoint计算延迟并累加
180+
double total_latency = 0.0;
181+
std::function<void(CXLSwitch*)> dfs_calculate = [&](CXLSwitch* node) {
182+
// 处理当前节点的expanders
183+
for (auto* expander : node->expanders) {
184+
total_latency += get_endpoint_rob_latency(expander, all_access, t_info, dramlatency);
185+
}
186+
187+
// 递归处理子节点
188+
for (auto* switch_ : node->switches) {
189+
dfs_calculate(switch_);
190+
}
191+
};
192+
193+
// 从当前controller开始DFS遍历
194+
dfs_calculate(this);
195+
196+
latency_lat += total_latency;
197+
bandwidth_lat += calculate_bandwidth(all_access);
198+
201199
return 0;
202200
}
203201
std::vector<std::string> CXLController::tokenize(const std::string_view &s) {
@@ -218,7 +216,7 @@ std::vector<std::string> CXLController::tokenize(const std::string_view &s) {
218216
}
219217
return res;
220218
}
221-
std::vector<std::tuple<int, int>> CXLController::get_access(uint64_t timestamp) { return CXLSwitch::get_access(timestamp); }
219+
std::vector<std::tuple<uint64_t, uint64_t>> CXLController::get_access(uint64_t timestamp) { return CXLSwitch::get_access(timestamp); }
222220
std::tuple<double, std::vector<uint64_t>> CXLController::calculate_congestion() {
223221
return CXLSwitch::calculate_congestion();
224222
}

0 commit comments

Comments
 (0)