Add journal articles from 2020 to 2023

ArtistFish · web-flow · commit fc650729983b · 2025-05-23T14:31:46.000+08:00
diff --git a/content/publication/2020-CORR-GUO/cite.bib b/content/publication/2020-CORR-GUO/cite.bib
@@ -0,0 +1,18 @@
+@article{DBLP:journals/corr/abs-2011-09501,
+  author       = {Yixin Guo and
+                  Pengcheng Li and
+                  Yingwei Luo and
+                  Xiaolin Wang and
+                  Zhenlin Wang},
+  title        = {{GRAPHSPY:} Fused Program Semantic-Level Embedding via Graph Neural
+                  Networks for Dead Store Detection},
+  journal      = {CoRR},
+  volume       = {abs/2011.09501},
+  year         = {2020},
+  url          = {https://arxiv.org/abs/2011.09501},
+  eprinttype    = {arXiv},
+  eprint       = {2011.09501},
+  timestamp    = {Mon, 22 Jul 2024 08:26:51 +0200},
+  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-09501.bib},
+  bibsource    = {dblp computer science bibliography, https://dblp.org}
+}
diff --git a/content/publication/2020-CORR-GUO/index.md b/content/publication/2020-CORR-GUO/index.md
@@ -0,0 +1,49 @@
+---
+title: "GRAPHSPY: Fused Program Semantic-Level Embedding via Graph Neural Networks for Dead Store Detection"
+
+authors:
+  - Yixin Guo
+  - Pengcheng Li
+  - Yingwei Luo
+  - Xiaolin Wang
+  - Zhenlin Wang
+
+date: "2020-11-01T00:00:00Z"
+doi: ""
+
+# Schedule page publish date (NOT publication's date).
+publishDate: "2020-11-01T00:00:00Z"
+
+# Publication type.
+# Accepts a single type but formatted as a YAML list (for Hugo requirements).
+# Enter a publication type from the CSL standard.
+publication_types: ["article-journal"]
+
+# Publication name and optional abbreviated publication name.
+publication: "*Computing Research Repository*"
+publication_short: ""
+
+abstract: 'Production software oftentimes suffers from the issue of performance inefficiencies caused by inappropriate use of data structures, programming abstractions, and conservative compiler optimizations. It is desirable to avoid unnecessary memory operations. However, existing works often use a whole-program fine-grained monitoring method with incredibly high overhead. To this end, we propose a learning-aided approach to identify unnecessary memory operations intelligently with low overhead. By applying several prevalent graph neural network models to extract program semantics with respect to program structure, execution order and dynamic states, we present a novel, hybrid program embedding approach so that to derive unnecessary memory operations through the embedding. We train our model with tens of thousands of samples acquired from a set of real-world benchmarks. Results show that our model achieves 90% of accuracy and incurs only around a half of time overhead of the state-of-art tool.'
+
+# Summary. An optional shortened abstract.
+summary: ''
+
+tags: []
+
+# Display this page in the Featured widget?
+featured: true
+
+# Custom links (uncomment lines below)
+# links:
+# - name: Custom Link
+#   url: http://example.org
+
+url_pdf: 'https://arxiv.org/pdf/2011.09501'
+url_code: ''
+url_dataset: ''
+url_poster: ''
+url_project: ''
+url_slides: ''
+url_source: ''
+url_video: ''
+---
diff --git a/content/publication/2020-JCT-SHA/cite.bib b/content/publication/2020-JCT-SHA/cite.bib
@@ -0,0 +1,18 @@
+@article{DBLP:journals/jcst/ShaHLWW20,
+  author       = {Sai Sha and
+                  Jingyuan Hu and
+                  Yingwei Luo and
+                  Xiaolin Wang and
+                  Zhenlin Wang},
+  title        = {Huge Page Friendly Virtualized Memory Management},
+  journal      = {J. Comput. Sci. Technol.},
+  volume       = {35},
+  number       = {2},
+  pages        = {433--452},
+  year         = {2020},
+  url          = {https://doi.org/10.1007/s11390-020-9693-0},
+  doi          = {10.1007/S11390-020-9693-0},
+  timestamp    = {Tue, 21 Mar 2023 21:05:42 +0100},
+  biburl       = {https://dblp.org/rec/journals/jcst/ShaHLWW20.bib},
+  bibsource    = {dblp computer science bibliography, https://dblp.org}
+}
diff --git a/content/publication/2020-JCT-SHA/index.md b/content/publication/2020-JCT-SHA/index.md
@@ -0,0 +1,49 @@
+---
+title: "Huge Page Friendly Virtualized Memory Management"
+
+authors:
+  - Sai Sha
+  - Jingyuan Hu                  
+  - Yingwei Luo
+  - Xiaolin Wang
+  - Zhenlin Wang
+
+date: "2020-01-01T00:00:00Z"
+doi: ""
+
+# Schedule page publish date (NOT publication's date).
+publishDate: "2020-01-01T00:00:00Z"
+
+# Publication type.
+# Accepts a single type but formatted as a YAML list (for Hugo requirements).
+# Enter a publication type from the CSL standard.
+publication_types: ["article-journal"]
+
+# Publication name and optional abbreviated publication name.
+publication: "*Journal of Computer Science and Technology*"
+publication_short: ""
+
+abstract: 'With the rapid increase of memory consumption by applications running on cloud data centers, we need more efficient memory management in a virtualized environment. Exploiting huge pages becomes more critical for a virtual machine’s performance when it runs large working set size programs. Programs with large working set sizes are more sensitive to memory allocation, which requires us to quickly adjust the virtual machine’s memory to accommodate memory phase changes. It would be much more efficient if we could adjust virtual machines’ memory at the granularity of huge pages. However, existing virtual machine memory reallocation techniques, such as ballooning, do not support huge pages. In addition, in order to drive effective memory reallocation, we need to predict the actual memory demand of a virtual machine. We find that traditional memory demand estimation methods designed for regular pages cannot be simply ported to a system adopting huge pages. How to adjust the memory of virtual machines timely and effectively according to the periodic change of memory demand is another challenge we face. This paper proposes a dynamic huge page based memory balancing system (HPMBS) for efficient memory management in a virtualized environment. We first rebuild the ballooning mechanism in order to dispatch memory in the granularity of huge pages. We then design and implement a huge page working set size estimation mechanism which can accurately estimate a virtual machine’s memory demand in huge pages environments. Combining these two mechanisms, we finally use an algorithm based on dynamic programming to achieve dynamic memory balancing. Experiments show that our system saves memory and improves overall system performance with low overhead.'
+
+# Summary. An optional shortened abstract.
+summary: ''
+
+tags: []
+
+# Display this page in the Featured widget?
+featured: true
+
+# Custom links (uncomment lines below)
+# links:
+# - name: Custom Link
+#   url: http://example.org
+
+url_pdf: 'https://link.springer.com/article/10.1007/s11390-020-9693-0'
+url_code: ''
+url_dataset: ''
+url_poster: ''
+url_project: ''
+url_slides: ''
+url_source: ''
+url_video: ''
+---
diff --git a/content/publication/2021-TOS-PAN/cite.bib b/content/publication/2021-TOS-PAN/cite.bib
@@ -0,0 +1,18 @@
+@article{DBLP:journals/tos/PanWLW21,
+  author       = {Cheng Pan and
+                  Xiaolin Wang and
+                  Yingwei Luo and
+                  Zhenlin Wang},
+  title        = {Penalty- and Locality-aware Memory Allocation in Redis Using Enhanced
+                  {AET}},
+  journal      = {{ACM} Trans. Storage},
+  volume       = {17},
+  number       = {2},
+  pages        = {15:1--15:45},
+  year         = {2021},
+  url          = {https://doi.org/10.1145/3447573},
+  doi          = {10.1145/3447573},
+  timestamp    = {Sun, 19 Jan 2025 14:52:13 +0100},
+  biburl       = {https://dblp.org/rec/journals/tos/PanWLW21.bib},
+  bibsource    = {dblp computer science bibliography, https://dblp.org}
+}
diff --git a/content/publication/2021-TOS-PAN/index.md b/content/publication/2021-TOS-PAN/index.md
@@ -0,0 +1,49 @@
+---
+title: "Penalty- and Locality-aware Memory Allocation in Redis Using Enhanced AET"
+
+authors:
+  - Cheng Pan
+  - Xiaolin Wang
+  - Yingwei Luo
+  - Zhenlin Wang
+
+date: "2021-02-01T00:00:00Z"
+doi: ""
+
+# Schedule page publish date (NOT publication's date).
+publishDate: "2021-02-01T00:00:00Z"
+
+# Publication type.
+# Accepts a single type but formatted as a YAML list (for Hugo requirements).
+# Enter a publication type from the CSL standard.
+publication_types: ["article-journal"]
+
+# Publication name and optional abbreviated publication name.
+publication: "*ACM Transactions on Storage*"
+publication_short: ""
+
+abstract: 'Due to large data volume and low latency requirements of modern web services, the use of an in-memory key-value (KV) cache often becomes an inevitable choice (e.g., Redis and Memcached). The in-memory cache holds hot data, reduces request latency, and alleviates the load on background databases. Inheriting from the traditional hardware cache design, many existing KV cache systems still use recency-based cache replacement algorithms, e.g., least recently used or its approximations. However, the diversity of miss penalty distinguishes a KV cache from a hardware cache. Inadequate consideration of penalty can substantially compromise space utilization and request service time. KV accesses also demonstrate locality, which needs to be coordinated with miss penalty to guide cache management.
+In this article, we first discuss how to enhance the existing cache model, the Average Eviction Time model, so that it can adapt to modeling a KV cache. After that, we apply the model to Redis and propose pRedis, Penalty- and Locality-aware Memory Allocation in Redis, which synthesizes data locality and miss penalty, in a quantitative manner, to guide memory allocation and replacement in Redis. At the same time, we also explore the diurnal behavior of a KV store and exploit long-term reuse. We replace the original passive eviction mechanism with an automatic dump/load mechanism, to smooth the transition between access peaks and valleys. Our evaluation shows that pRedis effectively reduces the average and tail access latency with minimal time and space overhead. For both real-world and synthetic workloads, our approach delivers an average of 14.0%∼52.3% latency reduction over a state-of-the-art penalty-aware cache management scheme, Hyperbolic Caching (HC), and shows more quantitative predictability of performance. Moreover, we can obtain even lower average latency (1.1%∼5.5%) when dynamically switching policies between pRedis and HC.'
+
+# Summary. An optional shortened abstract.
+summary: ''
+
+tags: []
+
+# Display this page in the Featured widget?
+featured: true
+
+# Custom links (uncomment lines below)
+# links:
+# - name: Custom Link
+#   url: http://example.org
+
+url_pdf: 'https://dl.acm.org/doi/pdf/10.1145/3447573'
+url_code: ''
+url_dataset: ''
+url_poster: ''
+url_project: ''
+url_slides: ''
+url_source: ''
+url_video: ''
+---
diff --git a/content/publication/2022-CORR-SHA/cite.bib b/content/publication/2022-CORR-SHA/cite.bib
@@ -0,0 +1,18 @@
+@article{DBLP:journals/corr/abs-2209-13111,
+  author       = {Sai Sha and
+                  Chuandong Li and
+                  Yingwei Luo and
+                  Xiaolin Wang and
+                  Zhenlin Wang},
+  title        = {{HMM-V:} Heterogeneous Memory Management for Virtualization},
+  journal      = {CoRR},
+  volume       = {abs/2209.13111},
+  year         = {2022},
+  url          = {https://doi.org/10.48550/arXiv.2209.13111},
+  doi          = {10.48550/ARXIV.2209.13111},
+  eprinttype    = {arXiv},
+  eprint       = {2209.13111},
+  timestamp    = {Wed, 04 Oct 2023 19:04:03 +0200},
+  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-13111.bib},
+  bibsource    = {dblp computer science bibliography, https://dblp.org}
+}
diff --git a/content/publication/2022-CORR-SHA/index.md b/content/publication/2022-CORR-SHA/index.md
@@ -0,0 +1,50 @@
+---
+title: "HMM-V: Heterogeneous Memory Management for Virtualization"
+
+authors:
+  - Sai Sha
+  - Chuandong Li
+  - Yingwei Luo
+  - Xiaolin Wang
+  - Zhenlin Wang
+
+date: "2022-09-01T00:00:00Z"
+doi: ""
+
+# Schedule page publish date (NOT publication's date).
+publishDate: "2022-09-01T00:00:00Z"
+
+# Publication type.
+# Accepts a single type but formatted as a YAML list (for Hugo requirements).
+# Enter a publication type from the CSL standard.
+publication_types: ["article-journal"]
+
+# Publication name and optional abbreviated publication name.
+publication: "*Computing Research Repository*"
+publication_short: ""
+
+abstract: 'The memory demand of virtual machines (VMs) is increasing, while DRAM has limited capacity and high power consumption. Non-volatile memory (NVM) is an alternative to DRAM, but it has high latency and low bandwidth. We observe that the VM with heterogeneous memory may incur up to a 1.5× slowdown compared to a DRAM VM, if not managed well. However, none of the state-of-the-art heterogeneous memory management designs are customized for virtualization on a real system.
+In this paper, we propose HMM-V, a Heterogeneous Memory Management system for Virtualization. HMM-V automatically determines page hotness and migrates pages between DRAM and NVM to achieve performance close to the DRAM system. First, HMM-V tracks memory accesses through page table manipulation, but reduces the cost by leveraging Intel page-modification logging (PML) and a multi-level queue. Second, HMM-V quantifies the ``temperature'' of page and determines the hot set with bucket-sorting. HMM-V then efficiently migrates pages with minimal access pause and handles dirty pages with the assistance of PML. Finally, HMM-V provides pooling management to balance precious DRAM across multiple VMs to maximize utilization and overall performance. HMM-V is implemented on a real system with Intel Optane DC persistent memory. The four-VM co-running results show that HMM-V outperforms NUMA balancing and hardware management (Intel Optane memory mode) by 51% and 31%, respectively.'
+
+# Summary. An optional shortened abstract.
+summary: ''
+
+tags: []
+
+# Display this page in the Featured widget?
+featured: true
+
+# Custom links (uncomment lines below)
+# links:
+# - name: Custom Link
+#   url: http://example.org
+
+url_pdf: 'https://arxiv.org/pdf/2209.13111'
+url_code: ''
+url_dataset: ''
+url_poster: ''
+url_project: ''
+url_slides: ''
+url_source: ''
+url_video: ''
+---
diff --git a/content/publication/2022-IEEE-SHA/cite.bib b/content/publication/2022-IEEE-SHA/cite.bib
@@ -0,0 +1,19 @@
+@article{DBLP:journals/tc/ShaZLWW22,
+  author       = {Sai Sha and
+                  Yi Zhang and
+                  Yingwei Luo and
+                  Xiaolin Wang and
+                  Zhenlin Wang},
+  title        = {Accelerating Address Translation for Virtualization by Leveraging
+                  Hardware Mode},
+  journal      = {{IEEE} Trans. Computers},
+  volume       = {71},
+  number       = {11},
+  pages        = {3047--3060},
+  year         = {2022},
+  url          = {https://doi.org/10.1109/TC.2022.3145671},
+  doi          = {10.1109/TC.2022.3145671},
+  timestamp    = {Sun, 13 Nov 2022 17:52:42 +0100},
+  biburl       = {https://dblp.org/rec/journals/tc/ShaZLWW22.bib},
+  bibsource    = {dblp computer science bibliography, https://dblp.org}
+}
diff --git a/content/publication/2022-IEEE-SHA/index.md b/content/publication/2022-IEEE-SHA/index.md
@@ -0,0 +1,49 @@
+---
+title: "Accelerating Address Translation for Virtualization by Leveraging Hardware Mode"
+
+authors:
+  - Sai Sha
+  - Chuandong Li
+  - Yingwei Luo
+  - Xiaolin Wang
+  - Zhenlin Wang
+
+date: "2022-01-01T00:00:00Z"
+doi: ""
+
+# Schedule page publish date (NOT publication's date).
+publishDate: "2022-01-01T00:00:00Z"
+
+# Publication type.
+# Accepts a single type but formatted as a YAML list (for Hugo requirements).
+# Enter a publication type from the CSL standard.
+publication_types: ["article-journal"]
+
+# Publication name and optional abbreviated publication name.
+publication: "*IEEE Transactions on Computers*"
+publication_short: ""
+
+abstract: 'The overhead of memory virtualization remains nontrivial. The traditional shadow paging (TSP) resorts to a shadow page table (SPT) to achieve the native page walk speed, but page table updates require hypervisor interventions. Alternatively, nested paging enables low-overhead page table updates, but utilizes the hardware MMU to perform a long-latency two-dimensional page walk. This paper proposes new memory virtualization solutions based on hardware (machine) mode—the highest CPU privilege level in some architectures like Sunway and RISC-V. A programming interface, running in hardware mode, enables software-implementation of hardware support functions. We first propose Software-based Nested Paging (SNP), which extends the software MMU to perform a two-dimensional page walk in hardware mode. Second, we present Swift Shadow Paging (SSP), which accomplishes page table synchronization by intercepting TLB flushing in hardware mode. Finally we propose Accelerated Shadow Paging (ASP) combining SSP and SNP. ASP handles the last-level SPT page faults by walking two-dimensional page tables in hardware mode, which eliminates most hypervisor interventions. This paper systematically compares multiple memory virtualization models by analyzing their designs and evaluating their performance both on a real system and a simulator. The experiments show that the virtualization overhead of ASP is less than 4.5% for all workloads.'
+
+# Summary. An optional shortened abstract.
+summary: ''
+
+tags: []
+
+# Display this page in the Featured widget?
+featured: true
+
+# Custom links (uncomment lines below)
+# links:
+# - name: Custom Link
+#   url: http://example.org
+
+url_pdf: 'https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9693114'
+url_code: ''
+url_dataset: ''
+url_poster: ''
+url_project: ''
+url_slides: ''
+url_source: ''
+url_video: ''
+---
diff --git a/content/publication/2022-TPDS-YE/cite.bib b/content/publication/2022-TPDS-YE/cite.bib
@@ -0,0 +1,20 @@
+@article{DBLP:journals/tpds/YeSGZWYL22,
+  author       = {Zhisheng Ye and
+                  Peng Sun and
+                  Wei Gao and
+                  Tianwei Zhang and
+                  Xiaolin Wang and
+                  Shengen Yan and
+                  Yingwei Luo},
+  title        = {Astraea: {A} Fair Deep Learning Scheduler for Multi-Tenant {GPU} Clusters},
+  journal      = {{IEEE} Trans. Parallel Distributed Syst.},
+  volume       = {33},
+  number       = {11},
+  pages        = {2781--2793},
+  year         = {2022},
+  url          = {https://doi.org/10.1109/TPDS.2021.3136245},
+  doi          = {10.1109/TPDS.2021.3136245},
+  timestamp    = {Mon, 13 Jun 2022 20:56:55 +0200},
+  biburl       = {https://dblp.org/rec/journals/tpds/YeSGZWYL22.bib},
+  bibsource    = {dblp computer science bibliography, https://dblp.org}
+}
diff --git a/content/publication/2022-TPDS-YE/index.md b/content/publication/2022-TPDS-YE/index.md
diff --git a/content/publication/2023-CORR-LI/cite.bib b/content/publication/2023-CORR-LI/cite.bib
diff --git a/content/publication/2023-CORR-LI/index.md b/content/publication/2023-CORR-LI/index.md