Skip to content

Commit 74584bc

Browse files
committed
add new 2
1 parent 16c58b5 commit 74584bc

File tree

7 files changed

+84
-83
lines changed

7 files changed

+84
-83
lines changed

include/cxlcontroller.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ class CXLController : public CXLSwitch {
8888
std::queue<lbr> ring_buffer;
8989
// rob info
9090
typedef struct {
91-
std::vector<uint64_t> m_bandwidth, m_count;
92-
uint64_t llcm_base, llcm_count, ins_count;
91+
std::map<int,int64_t> m_bandwidth, m_count;
92+
int64_t llcm_base, llcm_count, ins_count;
9393
} rob_info;
9494
typedef struct {
9595
rob_info rob;
@@ -104,10 +104,10 @@ class CXLController : public CXLSwitch {
104104
std::vector<std::string> tokenize(const std::string_view &s);
105105
std::tuple<double, std::vector<uint64_t>> calculate_congestion() override;
106106
void set_epoch(int epoch) override;
107-
std::tuple<int, int> get_access(uint64_t timestamp) override;
108-
double calculate_latency(const std::tuple<int, int> &elem,
107+
std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) override;
108+
double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
109109
double dramlatency) override; // traverse the tree to calculate the latency
110-
double calculate_bandwidth(const std::tuple<int, int> &elem) override;
110+
double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) override;
111111
void insert_one(thread_info &t_info, lbr &lbr);
112112
int insert(uint64_t timestamp, uint64_t tid, lbr lbrs[32], cntr counters[32]);
113113
int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr, int index) override;

include/cxlendpoint.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,12 @@ class CXLEndPoint {
3232
virtual void free_stats(double size) = 0;
3333
virtual std::string output() = 0;
3434
virtual void delete_entry(uint64_t addr, uint64_t length) = 0;
35-
virtual double calculate_latency(const std::tuple<int, int> &elem, double dramlatency) = 0; // traverse the tree to calculate the latency
36-
virtual double calculate_bandwidth(const std::tuple<int, int> &elem) = 0;
35+
virtual double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
36+
double dramlatency) = 0; // traverse the tree to calculate the latency
37+
virtual double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) = 0;
3738
virtual int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr,
3839
int index) = 0; // 0 not this endpoint, 1 store, 2 load, 3 prefetch
39-
virtual std::tuple<int, int> get_access(uint64_t timestamp) = 0;
40+
virtual std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) = 0;
4041
};
4142

4243
class CXLMemExpander : public CXLEndPoint {
@@ -45,7 +46,6 @@ class CXLMemExpander : public CXLEndPoint {
4546
EmuCXLLatency latency;
4647
uint64_t capacity;
4748
std::map<uint64_t, uint64_t> occupation; // timestamp, pa
48-
std::map<uint64_t, uint64_t> va_pa_map; // va, pa
4949
CXLMemExpanderEvent counter{};
5050
CXLMemExpanderEvent last_counter{};
5151

@@ -58,12 +58,13 @@ class CXLMemExpander : public CXLEndPoint {
5858
uint64_t last_timestamp = 0;
5959
int id = -1;
6060
CXLMemExpander(int read_bw, int write_bw, int read_lat, int write_lat, int id, int capacity);
61-
std::tuple<int, int> get_access(uint64_t timestamp) override;
61+
std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) override;
6262
void set_epoch(int epoch) override;
6363
void free_stats(double size) override;
6464
int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr, int index) override;
65-
double calculate_latency(const std::tuple<int, int> &elem, double dramlatency) override; // traverse the tree to calculate the latency
66-
double calculate_bandwidth(const std::tuple<int, int> &elem) override;
65+
double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
66+
double dramlatency) override; // traverse the tree to calculate the latency
67+
double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) override;
6768
void delete_entry(uint64_t addr, uint64_t length) override;
6869
std::string output() override;
6970
};
@@ -80,9 +81,10 @@ class CXLSwitch : public CXLEndPoint {
8081

8182
double congestion_latency = 0.02; // us
8283
explicit CXLSwitch(int id);
83-
std::tuple<int, int> get_access(uint64_t timestamp) override;
84-
double calculate_latency(const std::tuple<int, int> &elem, double dramlatency) override; // traverse the tree to calculate the latency
85-
double calculate_bandwidth(const std::tuple<int, int> &elem) override;
84+
std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) override;
85+
double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
86+
double dramlatency) override; // traverse the tree to calculate the latency
87+
double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) override;
8688
int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr, int index) override;
8789
void delete_entry(uint64_t addr, uint64_t length) override;
8890
std::string output() override;

include/lbr.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
#define CXLMEMSIM_LBR_H
1414

1515
// 2 bits
16-
#define LBR_DATA_MASK 0
17-
#define LBR_DATA_SHIFT 1
16+
#define LBR_DATA_MASK 0x0000000000000003
17+
#define LBR_DATA_SHIFT 0
1818
// 8 bits
19-
#define LBR_INS_MASK 0
20-
#define LBR_INS_SHIFT 0
19+
#define LBR_INS_MASK 0xfffffff0000
20+
#define LBR_INS_SHIFT 16
2121

2222
#include "cxlcontroller.h"
2323
#include "helper.h"

src/cxlcontroller.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ CXLController::CXLController(std::array<Policy *, 4> p, int capacity, page_type
6060
// deferentiate R/W for multi reader multi writer
6161
}
6262

63-
double CXLController::calculate_latency(const std::tuple<int, int> &elem, double dramlatency) {
63+
double CXLController::calculate_latency(const std::vector<std::tuple<int, int>> &elem, double dramlatency) {
6464
return CXLSwitch::calculate_latency(elem, dramlatency);
6565
}
6666

67-
double CXLController::calculate_bandwidth(const std::tuple<int, int> &elem) {
67+
double CXLController::calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) {
6868
return CXLSwitch::calculate_bandwidth(elem);
6969
}
7070

@@ -118,19 +118,25 @@ void CXLController::delete_entry(uint64_t addr, uint64_t length) { CXLSwitch::de
118118

119119
void CXLController::insert_one(thread_info &t_info, lbr &lbr) {
120120
auto &rob = t_info.rob;
121-
auto llcm_count = lbr.flags & LBR_DATA_MASK >> LBR_DATA_SHIFT;
122-
auto ins_count = lbr.flags & LBR_INS_MASK >> LBR_INS_SHIFT;
121+
auto llcm_count = (lbr.flags & LBR_DATA_MASK) >> LBR_DATA_SHIFT;
122+
auto ins_count = (lbr.flags & LBR_INS_MASK) >> LBR_INS_SHIFT;
123+
124+
// ——在这里插入 ring_buffer,表示我们接收到了一个新的 lbr
125+
ring_buffer.push(lbr);
126+
123127
for (int i = 0; i < llcm_count; i++) {
124128
rob.m_count[t_info.llcm_type.front()]++;
125129
t_info.llcm_type_rob.push(t_info.llcm_type.front());
126130
t_info.llcm_type.pop();
127131
}
128132
rob.llcm_count += llcm_count;
129133
rob.ins_count += ins_count;
134+
130135
while (rob.ins_count > ROB_SIZE) {
131-
auto lbr = ring_buffer.front();
132-
llcm_count = (lbr.flags & LBR_DATA_MASK) >> LBR_DATA_SHIFT;
133-
ins_count = (lbr.flags & LBR_INS_MASK) >> LBR_INS_SHIFT;
136+
auto old_lbr = ring_buffer.front();
137+
llcm_count = (old_lbr.flags & LBR_DATA_MASK) >> LBR_DATA_SHIFT;
138+
ins_count = (old_lbr.flags & LBR_INS_MASK) >> LBR_INS_SHIFT;
139+
134140
rob.ins_count -= ins_count;
135141
rob.llcm_count -= llcm_count;
136142
rob.llcm_base += llcm_count;
@@ -212,9 +218,7 @@ std::vector<std::string> CXLController::tokenize(const std::string_view &s) {
212218
}
213219
return res;
214220
}
215-
std::tuple<int, int> CXLController::get_access(uint64_t timestamp) {
216-
return CXLSwitch::get_access(timestamp);
217-
}
221+
std::vector<std::tuple<int, int>> CXLController::get_access(uint64_t timestamp) { return CXLSwitch::get_access(timestamp); }
218222
std::tuple<double, std::vector<uint64_t>> CXLController::calculate_congestion() {
219223
return CXLSwitch::calculate_congestion();
220224
}

src/cxlendpoint.cpp

Lines changed: 37 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -19,29 +19,24 @@ CXLMemExpander::CXLMemExpander(int read_bw, int write_bw, int read_lat, int writ
1919
this->latency.read = read_lat;
2020
this->latency.write = write_lat;
2121
}
22-
double CXLMemExpander::calculate_latency(const std::tuple<int, int> &elem, double dramlatency) {
22+
double CXLMemExpander::calculate_latency(const std::vector<std::tuple<int, int>> &elem, double dramlatency) {
2323

2424
return 60;
2525
}
26-
double CXLMemExpander::calculate_bandwidth(const std::tuple<int, int> &elem) {
26+
double CXLMemExpander::calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) {
2727
// Iterate the map within the last 20ms
2828

2929
return this->bandwidth.read + this->bandwidth.write;
3030
}
3131
void CXLMemExpander::delete_entry(uint64_t addr, uint64_t length) {
32-
for (auto it1 = va_pa_map.begin(); it1 != va_pa_map.end();) {
33-
if (it1->second >= addr && it1->second <= addr + length) {
34-
for (auto it = occupation.begin(); it != occupation.end();) {
35-
if (it->second == addr) {
36-
it = occupation.erase(it);
37-
} else {
38-
++it;
39-
}
40-
}
41-
it1 = va_pa_map.erase(it1);
42-
this->counter.inc_load();
32+
for (auto it = occupation.begin(); it != occupation.end();) {
33+
if (it->second == addr) {
34+
it = occupation.erase(it);
35+
} else {
36+
++it;
4337
}
4438
}
39+
this->counter.inc_load();
4540
// kernel mode access
4641
for (auto it = occupation.begin(); it != occupation.end();) {
4742
if (it->second >= addr && it->second <= addr + length) {
@@ -61,12 +56,6 @@ int CXLMemExpander::insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr,
6156
last_timestamp = last_timestamp > timestamp ? last_timestamp : timestamp; // Update the last timestamp
6257
// Check if the address is already in the map)
6358
if (phys_addr != 0) {
64-
if (va_pa_map.find(virt_addr) == va_pa_map.end()) {
65-
this->va_pa_map.emplace(virt_addr, phys_addr);
66-
} else {
67-
this->va_pa_map[virt_addr] = phys_addr;
68-
SPDLOG_DEBUG("virt:{} phys:{} conflict insertion detected\n", virt_addr, phys_addr);
69-
}
7059
for (auto it = this->occupation.cbegin(); it != this->occupation.cend(); it++) {
7160
if ((*it).second == phys_addr) {
7261
this->occupation.erase(it);
@@ -95,24 +84,30 @@ int CXLMemExpander::insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr,
9584
return 0;
9685
}
9786
std::string CXLMemExpander::output() { return std::format("CXLMemExpander {}", this->id); }
98-
std::tuple<int, int> CXLMemExpander::get_access(uint64_t timestamp) {
99-
this->last_read = this->counter.load - this->last_counter.load;
100-
this->last_write = this->counter.store - this->last_counter.store;
87+
std::vector<std::tuple<int, int>> CXLMemExpander::get_access(uint64_t timestamp) {
10188
last_counter = CXLMemExpanderEvent(counter);
102-
return std::make_tuple(this->last_read, this->last_write);
89+
std::vector<std::tuple<int, int>> res;
90+
// Iterate the map within the last 100ns
91+
for (auto it = occupation.begin(); it != occupation.end();) {
92+
if (it->first > timestamp - 100) {
93+
res.push_back(std::make_tuple(it->first, it->second));
94+
} else {
95+
++it;
96+
}
97+
}
98+
return res;
10399
}
104100
void CXLMemExpander::set_epoch(int epoch) { this->epoch = epoch; }
105101
void CXLMemExpander::free_stats(double size) {
106-
std::vector<uint64_t> keys;
107-
for (auto &it : this->va_pa_map) {
108-
keys.push_back(it.first);
109-
}
110-
std::shuffle(keys.begin(), keys.end(), std::mt19937(std::random_device()()));
111-
for (auto it = keys.begin(); it != keys.end(); ++it) {
112-
if (this->va_pa_map[*it] > size) {
113-
this->va_pa_map.erase(*it);
114-
this->occupation.erase(*it);
115-
this->counter.inc_load();
102+
// 随机删除
103+
std::random_device rd;
104+
std::mt19937 gen(rd());
105+
std::uniform_int_distribution<> dis(0, 1);
106+
for (auto it = occupation.begin(); it != occupation.end();) {
107+
if (dis(gen) == 1) {
108+
it = occupation.erase(it);
109+
} else {
110+
++it;
116111
}
117112
}
118113
}
@@ -147,7 +142,7 @@ void CXLSwitch::delete_entry(uint64_t addr, uint64_t length) {
147142
}
148143
}
149144
CXLSwitch::CXLSwitch(int id) : id(id) {}
150-
double CXLSwitch::calculate_latency(const std::tuple<int, int> &elem, double dramlatency) {
145+
double CXLSwitch::calculate_latency(const std::vector<std::tuple<int, int>> &elem, double dramlatency) {
151146
double lat = 0.0;
152147
for (auto &expander : this->expanders) {
153148
lat += expander->calculate_latency(elem, dramlatency);
@@ -157,7 +152,7 @@ double CXLSwitch::calculate_latency(const std::tuple<int, int> &elem, double dra
157152
}
158153
return lat;
159154
}
160-
double CXLSwitch::calculate_bandwidth(const std::tuple<int, int> &elem) {
155+
double CXLSwitch::calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) {
161156
double bw = 0.0;
162157
for (auto &expander : this->expanders) {
163158
bw += expander->calculate_bandwidth(elem);
@@ -197,19 +192,17 @@ std::tuple<double, std::vector<uint64_t>> CXLSwitch::calculate_congestion() {
197192
}
198193
return std::make_tuple(latency, congestion);
199194
}
200-
std::tuple<int, int> CXLSwitch::get_access(uint64_t timestamp) {
201-
int read = 0, write = 0;
195+
std::vector<std::tuple<int, int>> CXLSwitch::get_access(uint64_t timestamp) {
196+
auto res = std::vector<std::tuple<int, int>>();
202197
for (auto &expander : this->expanders) {
203-
auto [r, w] = expander->get_access(timestamp);
204-
read += r;
205-
write += w;
198+
auto tmp = expander->get_access(timestamp);
199+
res.insert(res.end(), tmp.begin(), tmp.end());
206200
}
207201
for (auto &switch_ : this->switches) {
208-
auto [r, w] = switch_->get_access(timestamp);
209-
read += r;
210-
write += w;
202+
auto tmp = switch_->get_access(timestamp);
203+
res.insert(res.end(), tmp.begin(), tmp.end());
211204
}
212-
return std::make_tuple(read, write);
205+
return res;
213206
}
214207
void CXLSwitch::set_epoch(int epoch) { this->epoch = epoch; }
215208
void CXLSwitch::free_stats(double size) {

src/lbr.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,30 +139,32 @@ int LBR::read(CXLController *controller, LBRElem *elem) {
139139
// printf("read lbr\n");
140140
switch (header->type) {
141141
case PERF_RECORD_LOST:
142-
SPDLOG_DEBUG("received PERF_RECORD_LOST\n");
142+
SPDLOG_DEBUG("received PERF_RECORD_LOST");
143143
break;
144144
case PERF_RECORD_SAMPLE:
145145
data = reinterpret_cast<lbr_sample *>(dp + this->rdlen % DATA_SIZE);
146146

147147
if (header->size < sizeof(*data)) {
148-
SPDLOG_DEBUG("size too small. size:{}\n", header->size);
148+
SPDLOG_DEBUG("size too small. size:{}", header->size);
149149
r = -1;
150150
return r;
151151
}
152152
if (header->size > sizeof(*data)) {
153-
SPDLOG_DEBUG("size too big. size:{} / {}\n", header->size, sizeof(*data));
153+
SPDLOG_DEBUG("size too big. size:{} / {}", header->size, sizeof(*data));
154154
}
155155
if (this->pid == data->pid) {
156156
SPDLOG_ERROR("pid:{} tid:{} size:{} nr2:{} data-size:{} cpu:{} timestamp:{} hw_idx: lbrs:{} "
157-
"counters:{} {} {}\n",
157+
"counters:{} {} {}",
158158
data->pid, data->tid, header->size, /*data->nr,*/ data->nr2, sizeof(*data),
159159
/*data->ips[0],*/ data->cpu, data->timestamp, /* data->hw_idx,*/ data->lbrs[0].from,
160160
data->counters[0].counters, data->counters[1].counters, data->counters[2].counters);
161-
controller->insert(data->timestamp, data->tid, data->lbrs, data->counters);
162-
elem->tid = data->tid;
161+
163162
memcpy(&elem->branch_stack,
164163
(char *)&data->counters + (32 * 8), // Cast to char* before arithmetic
165164
92 * 8);
165+
controller->insert(data->timestamp, data->tid, data->lbrs, data->counters);
166+
elem->tid = data->tid;
167+
166168
elem->total++;
167169
r = 1;
168170
}

src/main.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,14 +252,14 @@ int main(int argc, char *argv[]) {
252252
if (mon.bpftime_ctx->read(controller, &mon.after->bpftime) < 0) {
253253
SPDLOG_ERROR("[{}:{}:{}] Warning: Failed BPFTIMERUNTIME read", i, mon.tgid, mon.tid);
254254
}
255-
/* read LBR sample */
256-
if (mon.lbr_ctx->read(controller, &mon.after->lbr) < 0) {
257-
SPDLOG_ERROR("[{}:{}:{}] Warning: Failed LBR read", i, mon.tgid, mon.tid);
258-
}
259255
/* read PEBS sample */
260256
if (mon.pebs_ctx->read(controller, &mon.after->pebs) < 0) {
261257
SPDLOG_ERROR("[{}:{}:{}] Warning: Failed PEBS read", i, mon.tgid, mon.tid);
262258
}
259+
/* read LBR sample */
260+
if (mon.lbr_ctx->read(controller, &mon.after->lbr) < 0) {
261+
SPDLOG_ERROR("[{}:{}:{}] Warning: Failed LBR read", i, mon.tgid, mon.tid);
262+
}
263263
target_llcmiss = mon.after->pebs.total - mon.before->pebs.total;
264264

265265
for (int j = 0; j < helper.used_cpu.size(); j++) {

0 commit comments

Comments
 (0)