SlugLab
diff --git a/‎include/cxlcontroller.h‎
Lines changed: 7 additions & 17 deletions b/‎include/cxlcontroller.h‎
Lines changed: 7 additions & 17 deletions
diff --git a/‎include/cxlendpoint.h‎
Lines changed: 35 additions & 17 deletions b/‎include/cxlendpoint.h‎
Lines changed: 35 additions & 17 deletions
diff --git a/‎include/helper.h‎
Lines changed: 2 additions & 1 deletion b/‎include/helper.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎include/monitor.h‎
Lines changed: 4 additions & 4 deletions b/‎include/monitor.h‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎microbench/cache-miss.c‎
Lines changed: 1 addition & 1 deletion b/‎microbench/cache-miss.c‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/bpftimeruntime.cpp‎
Lines changed: 1 addition & 1 deletion b/‎src/bpftimeruntime.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cxlcontroller.cpp‎
Lines changed: 28 additions & 30 deletions b/‎src/cxlcontroller.cpp‎
Lines changed: 28 additions & 30 deletions
@@ -17,7 +17,6 @@
 #include <queue>
 #include <string_view>
 
-#define ROB_SIZE 512
 class Monitors;
 struct mem_stats;
 struct proc_info;
@@ -59,7 +58,7 @@ class PagingPolicy : public Policy {
     // paging related
 };
 
-class CachingPolicy: public Policy {
+class CachingPolicy : public Policy {
 public:
     CachingPolicy();
     // paging related
@@ -74,7 +73,7 @@ class CXLController : public CXLSwitch {
     PagingPolicy *paging_policy{};
     CachingPolicy *caching_policy{};
     CXLCounter counter;
-    std::map<uint64_t, uint64_t> occupation;
+    std::map<uint64_t, occupation_info> occupation;
     page_type page_type_; // percentage
     // no need for va pa map because v-indexed will not caught by us
     int num_switches = 0;
@@ -87,32 +86,23 @@ class CXLController : public CXLSwitch {
     // ring buffer
     std::queue<lbr> ring_buffer;
     // rob info
-    typedef struct {
-        std::map<int,int64_t> m_bandwidth, m_count;
-        int64_t llcm_base, llcm_count, ins_count;
-    } rob_info;
-    typedef struct {
-        rob_info rob;
-        std::queue<int> llcm_type;
-        std::queue<int> llcm_type_rob;
-    } thread_info;
     std::unordered_map<uint64_t, thread_info> thread_map;
 
-    explicit CXLController(std::array<Policy*,4> p, int capacity, page_type page_type_, int epoch, double dramlatency);
+    explicit CXLController(std::array<Policy *, 4> p, int capacity, page_type page_type_, int epoch,
+                           double dramlatency);
     void construct_topo(std::string_view newick_tree);
     void insert_end_point(CXLMemExpander *end_point);
     std::vector<std::string> tokenize(const std::string_view &s);
     std::tuple<double, std::vector<uint64_t>> calculate_congestion() override;
     void set_epoch(int epoch) override;
-    std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) override;
-    double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
+    std::vector<std::tuple<uint64_t, uint64_t>> get_access(uint64_t timestamp) override;
+    double calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem,
                              double dramlatency) override; // traverse the tree to calculate the latency
-    double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) override;
+    double calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) override;
     void insert_one(thread_info &t_info, lbr &lbr);
     int insert(uint64_t timestamp, uint64_t tid, lbr lbrs[32], cntr counters[32]);
     int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr, int index) override;
     void delete_entry(uint64_t addr, uint64_t length) override;
-    std::string output() override;
     void set_stats(mem_stats stats);
     static void set_process_info(const proc_info &process_info);
     static void set_thread_info(const proc_info &thread_info);
 
@@ -15,12 +15,28 @@
 #include "cxlcounter.h"
 #include "helper.h"
 #include <list>
+#include <queue>
 #include <map>
 #include <string>
 #include <tuple>
 #include <unordered_map>
 #include <vector>
+#define ROB_SIZE 512
 
+struct occupation_info {
+    uint64_t timestamp;
+    uint64_t address;
+    uint64_t access_count;
+};
+struct rob_info {
+    std::map<int, int64_t> m_bandwidth, m_count;
+    int64_t llcm_base, llcm_count, ins_count;
+};
+struct thread_info {
+    rob_info rob;
+    std::queue<int> llcm_type;
+    std::queue<int> llcm_type_rob;
+};
 // Forward declarations
 class CXLController;
 class CXLEndPoint {
@@ -30,22 +46,22 @@ class CXLEndPoint {
 private:
     virtual void set_epoch(int epoch) = 0;
     virtual void free_stats(double size) = 0;
-    virtual std::string output() = 0;
     virtual void delete_entry(uint64_t addr, uint64_t length) = 0;
-    virtual double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
+    virtual double calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem,
                                      double dramlatency) = 0; // traverse the tree to calculate the latency
-    virtual double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) = 0;
+    virtual double calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) = 0;
     virtual int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr,
                        int index) = 0; // 0 not this endpoint, 1 store, 2 load, 3 prefetch
-    virtual std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) = 0;
+    virtual std::vector<std::tuple<uint64_t, uint64_t>> get_access(uint64_t timestamp) = 0;
 };
 
 class CXLMemExpander : public CXLEndPoint {
 public:
-    EmuCXLBandwidth bandwidth;
-    EmuCXLLatency latency;
+    EmuCXLBandwidth bandwidth{};
+    EmuCXLLatency latency{};
     uint64_t capacity;
-    std::map<uint64_t, uint64_t> occupation; // timestamp, pa
+
+    std::vector<occupation_info> occupation; // timestamp, pa
     CXLMemExpanderEvent counter{};
     CXLMemExpanderEvent last_counter{};
 
@@ -58,15 +74,14 @@ class CXLMemExpander : public CXLEndPoint {
     uint64_t last_timestamp = 0;
     int id = -1;
     CXLMemExpander(int read_bw, int write_bw, int read_lat, int write_lat, int id, int capacity);
-    std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) override;
+    std::vector<std::tuple<uint64_t, uint64_t>> get_access(uint64_t timestamp) override;
     void set_epoch(int epoch) override;
     void free_stats(double size) override;
     int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr, int index) override;
-    double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
+    double calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem,
                              double dramlatency) override; // traverse the tree to calculate the latency
-    double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) override;
+    double calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) override;
     void delete_entry(uint64_t addr, uint64_t length) override;
-    std::string output() override;
 };
 class CXLSwitch : public CXLEndPoint {
 public:
@@ -76,18 +91,21 @@ class CXLSwitch : public CXLEndPoint {
     int id = -1;
     int epoch = 0;
     uint64_t last_timestamp = 0;
-    // get the approximate congestion and target done time
+    // TODO get the approximate congestion and target done time
     std::unordered_map<uint64_t, uint64_t> timeseries_map;
 
-    double congestion_latency = 0.02; // us
+    double congestion_latency = 90; // 200ns is the latency of the switch
     explicit CXLSwitch(int id);
-    std::vector<std::tuple<int, int>> get_access(uint64_t timestamp) override;
-    double calculate_latency(const std::vector<std::tuple<int, int>> &elem,
+    std::vector<std::tuple<uint64_t, uint64_t>> get_access(uint64_t timestamp) override;
+    double calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem,
                              double dramlatency) override; // traverse the tree to calculate the latency
-    double calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) override;
+    double calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) override;
+    double get_endpoint_rob_latency(CXLMemExpander* endpoint,
+                                  const std::vector<std::tuple<uint64_t, uint64_t>>& accesses,
+                                  const thread_info& t_info,
+                                  double dramlatency);
     int insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr, uint64_t virt_addr, int index) override;
     void delete_entry(uint64_t addr, uint64_t length) override;
-    std::string output() override;
     virtual std::tuple<double, std::vector<uint64_t>> calculate_congestion();
     void set_epoch(int epoch) override;
     void free_stats(double size) override;
 
@@ -34,9 +34,10 @@ enum {
     CPU_MDL_SKX = 85,
     CPU_MDL_SPR = 143,
     CPU_MDL_ADL = 151,
+    CPU_MDL_GNR = 173,
+    CPU_MDL_SRF = 175,
     CPU_MDL_LNL = 189,
     CPU_MDL_ARL = 198,
-    CPU_MDL_SRF = 201,
     CPU_MDL_END = 0x0ffff
 };
 class Incore;
 
@@ -44,7 +44,7 @@ class Monitors {
     void stop_all(int);
     void run_all(int);
     Monitor *get_mon(int, int);
-    int enable(const uint32_t, const uint32_t, bool, uint64_t, const int32_t);
+    int enable(uint32_t, uint32_t, bool, uint64_t, int32_t);
     void disable(uint32_t target);
     int terminate(uint32_t, uint32_t, int32_t);
     bool check_all_terminated(uint32_t);
@@ -66,9 +66,9 @@ class Monitor {
     double total_delay;
     timespec start_exec_ts, end_exec_ts;
     bool is_process;
-    PEBS *pebs_ctx;
-    LBR *lbr_ctx;
-    BpfTimeRuntime *bpftime_ctx;
+    PEBS *pebs_ctx{};
+    LBR *lbr_ctx{};
+    BpfTimeRuntime *bpftime_ctx{};
 
     Monitor(const Monitor &other)
         : tgid(other.tgid), tid(other.tid), cpu_core(other.cpu_core), wanted_delay(other.wanted_delay),
 
@@ -6,7 +6,7 @@
 // 假设每个缓存行是64字节，LLC是8MB
 #define ARRAY_SIZE (32 * 1024 * 1024)  // 32MB
 #define STRIDE 64  // 以缓存行大小作为步长
-#define ITERATIONS 1000000000
+#define ITERATIONS 10000000
 
 int main() {
     // 分配大数组
 
@@ -45,7 +45,7 @@ int BpfTimeRuntime::read(CXLController *controller, BPFTimeRuntimeElem *elem) {
     for (int i = 6; i < 11; i++) {
         int key = 0;
         int key1 = 0;
-        auto item1 = bpftime_map_get_next_key(i, &key1, &key); // process map
+        bpftime_map_get_next_key(i, &key1, &key); // process map
         auto item2 = bpftime_map_lookup_elem(i, &key); // allocs map
         SPDLOG_DEBUG("Process map key: {} {} {}", key1, key, tid);
         if (i == 6 && item2 != nullptr) {
 
@@ -60,36 +60,14 @@ CXLController::CXLController(std::array<Policy *, 4> p, int capacity, page_type
     // deferentiate R/W for multi reader multi writer
 }
 
-double CXLController::calculate_latency(const std::vector<std::tuple<int, int>> &elem, double dramlatency) {
+double CXLController::calculate_latency(const std::vector<std::tuple<uint64_t, uint64_t>> &elem, double dramlatency) {
     return CXLSwitch::calculate_latency(elem, dramlatency);
 }
 
-double CXLController::calculate_bandwidth(const std::vector<std::tuple<int, int>> &elem) {
+double CXLController::calculate_bandwidth(const std::vector<std::tuple<uint64_t, uint64_t>> &elem) {
     return CXLSwitch::calculate_bandwidth(elem);
 }
 
-std::string CXLController::output() {
-    std::string res;
-    if (!this->switches.empty()) {
-        res += "(";
-        res += this->switches[0]->output();
-        for (size_t i = 1; i < this->switches.size(); ++i) {
-            res += ",";
-            res += this->switches[i]->output();
-        }
-        res += ")";
-    }
-    if (!this->expanders.empty()) {
-        res += "(";
-        res += this->expanders[0]->output();
-        for (size_t i = 1; i < this->expanders.size(); ++i) {
-            res += ",";
-            res += this->expanders[i]->output();
-        }
-        res += ")";
-    }
-    return res;
-}
 
 void CXLController::set_stats(mem_stats stats) {
     // SPDLOG_INFO("stats: {} {} {} {} {}", stats.total_allocated, stats.total_freed, stats.current_usage,
@@ -187,17 +165,37 @@ int CXLController::insert(uint64_t timestamp, uint64_t tid, uint64_t phys_addr,
     return res; // 返回实际的结果而不是固定的true
 }
 int CXLController::insert(uint64_t timestamp, uint64_t tid, lbr lbrs[32], cntr counters[32]) {
+    // 处理LBR记录
     for (int i = 0; i < 32; i++) {
         if (!lbrs[i].from) {
             break;
         }
         insert_one(thread_map[tid], lbrs[i]);
-        // TODO calculate delay
-        // timestamp
     }
-    auto all_access = get_access(timestamp); // get the current branch access?
-    latency_lat += calculate_latency(all_access, dramlatency); // insert once
-    bandwidth_lat += calculate_bandwidth(all_access); // insert once
+
+    auto all_access = get_access(timestamp);
+    auto& t_info = thread_map[tid];
+
+    // 对每个endpoint计算延迟并累加
+    double total_latency = 0.0;
+    std::function<void(CXLSwitch*)> dfs_calculate = [&](CXLSwitch* node) {
+        // 处理当前节点的expanders
+        for (auto* expander : node->expanders) {
+            total_latency += get_endpoint_rob_latency(expander, all_access, t_info, dramlatency);
+        }
+
+        // 递归处理子节点
+        for (auto* switch_ : node->switches) {
+            dfs_calculate(switch_);
+        }
+    };
+
+    // 从当前controller开始DFS遍历
+    dfs_calculate(this);
+
+    latency_lat += total_latency;
+    bandwidth_lat += calculate_bandwidth(all_access);
+
     return 0;
 }
 std::vector<std::string> CXLController::tokenize(const std::string_view &s) {
@@ -218,7 +216,7 @@ std::vector<std::string> CXLController::tokenize(const std::string_view &s) {
     }
     return res;
 }
-std::vector<std::tuple<int, int>> CXLController::get_access(uint64_t timestamp) { return CXLSwitch::get_access(timestamp); }
+std::vector<std::tuple<uint64_t, uint64_t>> CXLController::get_access(uint64_t timestamp) { return CXLSwitch::get_access(timestamp); }
 std::tuple<double, std::vector<uint64_t>> CXLController::calculate_congestion() {
     return CXLSwitch::calculate_congestion();
 }