|
| 1 | +swiotlb: remove alloc_size argument to swiotlb_tbl_map_single() |
| 2 | + |
| 3 | +jira LE-1907 |
| 4 | +cve CVE-2024-35814 |
| 5 | +Rebuild_History Non-Buildable kernel-4.18.0-553.16.1.el8_10 |
| 6 | +commit-author Michael Kelley < [email protected]> |
| 7 | +commit 327e2c97c46a4d971c5450a9d05b4a673f46c4da |
| 8 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 9 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 10 | +ciq/ciq_backports/kernel-4.18.0-553.16.1.el8_10/327e2c97.failed |
| 11 | + |
| 12 | +Currently swiotlb_tbl_map_single() takes alloc_align_mask and |
| 13 | +alloc_size arguments to specify an swiotlb allocation that is larger |
| 14 | +than mapping_size. This larger allocation is used solely by |
| 15 | +iommu_dma_map_single() to handle untrusted devices that should not have |
| 16 | +DMA visibility to memory pages that are partially used for unrelated |
| 17 | +kernel data. |
| 18 | + |
| 19 | +Having two arguments to specify the allocation is redundant. While |
| 20 | +alloc_align_mask naturally specifies the alignment of the starting |
| 21 | +address of the allocation, it can also implicitly specify the size |
| 22 | +by rounding up the mapping_size to that alignment. |
| 23 | + |
| 24 | +Additionally, the current approach has an edge case bug. |
| 25 | +iommu_dma_map_page() already does the rounding up to compute the |
| 26 | +alloc_size argument. But swiotlb_tbl_map_single() then calculates the |
| 27 | +alignment offset based on the DMA min_align_mask, and adds that offset to |
| 28 | +alloc_size. If the offset is non-zero, the addition may result in a value |
| 29 | +that is larger than the max the swiotlb can allocate. If the rounding up |
| 30 | +is done _after_ the alignment offset is added to the mapping_size (and |
| 31 | +the original mapping_size conforms to the value returned by |
| 32 | +swiotlb_max_mapping_size), then the max that the swiotlb can allocate |
| 33 | +will not be exceeded. |
| 34 | + |
| 35 | +In view of these issues, simplify the swiotlb_tbl_map_single() interface |
| 36 | +by removing the alloc_size argument. Most call sites pass the same value |
| 37 | +for mapping_size and alloc_size, and they pass alloc_align_mask as zero. |
| 38 | +Just remove the redundant argument from these callers, as they will see |
| 39 | +no functional change. For iommu_dma_map_page() also remove the alloc_size |
| 40 | +argument, and have swiotlb_tbl_map_single() compute the alloc_size by |
| 41 | +rounding up mapping_size after adding the offset based on min_align_mask. |
| 42 | +This has the side effect of fixing the edge case bug but with no other |
| 43 | +functional change. |
| 44 | + |
| 45 | +Also add a sanity test on the alloc_align_mask. While IOMMU code |
| 46 | +currently ensures the granule is not larger than PAGE_SIZE, if that |
| 47 | +guarantee were to be removed in the future, the downstream effect on the |
| 48 | +swiotlb might go unnoticed until strange allocation failures occurred. |
| 49 | + |
| 50 | +Tested on an ARM64 system with 16K page size and some kernel test-only |
| 51 | +hackery to allow modifying the DMA min_align_mask and the granule size |
| 52 | +that becomes the alloc_align_mask. Tested these combinations with a |
| 53 | +variety of original memory addresses and sizes, including those that |
| 54 | +reproduce the edge case bug: |
| 55 | + |
| 56 | + * 4K granule and 0 min_align_mask |
| 57 | + * 4K granule and 0xFFF min_align_mask (4K - 1) |
| 58 | + * 16K granule and 0xFFF min_align_mask |
| 59 | + * 64K granule and 0xFFF min_align_mask |
| 60 | + * 64K granule and 0x3FFF min_align_mask (16K - 1) |
| 61 | + |
| 62 | +With the changes, all combinations pass. |
| 63 | + |
| 64 | + Signed-off-by: Michael Kelley < [email protected]> |
| 65 | + Reviewed-by: Petr Tesarik < [email protected]> |
| 66 | + Signed-off-by: Christoph Hellwig < [email protected]> |
| 67 | +(cherry picked from commit 327e2c97c46a4d971c5450a9d05b4a673f46c4da) |
| 68 | + Signed-off-by: Jonathan Maple < [email protected]> |
| 69 | + |
| 70 | +# Conflicts: |
| 71 | +# kernel/dma/swiotlb.c |
| 72 | +diff --cc kernel/dma/swiotlb.c |
| 73 | +index c0e227dcb45e,046da973a7e2..000000000000 |
| 74 | +--- a/kernel/dma/swiotlb.c |
| 75 | ++++ b/kernel/dma/swiotlb.c |
| 76 | +@@@ -713,16 -1312,71 +713,72 @@@ static unsigned long mem_used(struct io |
| 77 | + return used; |
| 78 | + } |
| 79 | + |
| 80 | +++<<<<<<< HEAD |
| 81 | +++======= |
| 82 | ++ /** |
| 83 | ++ * mem_used() - get number of used slots in an allocator |
| 84 | ++ * @mem: Software IO TLB allocator. |
| 85 | ++ * |
| 86 | ++ * The result is not accurate, because there is no locking of individual |
| 87 | ++ * areas. |
| 88 | ++ * |
| 89 | ++ * Return: Approximate number of used slots. |
| 90 | ++ */ |
| 91 | ++ static unsigned long mem_used(struct io_tlb_mem *mem) |
| 92 | ++ { |
| 93 | ++ #ifdef CONFIG_SWIOTLB_DYNAMIC |
| 94 | ++ struct io_tlb_pool *pool; |
| 95 | ++ unsigned long used = 0; |
| 96 | ++ |
| 97 | ++ rcu_read_lock(); |
| 98 | ++ list_for_each_entry_rcu(pool, &mem->pools, node) |
| 99 | ++ used += mem_pool_used(pool); |
| 100 | ++ rcu_read_unlock(); |
| 101 | ++ |
| 102 | ++ return used; |
| 103 | ++ #else |
| 104 | ++ return mem_pool_used(&mem->defpool); |
| 105 | ++ #endif |
| 106 | ++ } |
| 107 | ++ |
| 108 | ++ #endif /* CONFIG_DEBUG_FS */ |
| 109 | ++ |
| 110 | ++ /** |
| 111 | ++ * swiotlb_tbl_map_single() - bounce buffer map a single contiguous physical area |
| 112 | ++ * @dev: Device which maps the buffer. |
| 113 | ++ * @orig_addr: Original (non-bounced) physical IO buffer address |
| 114 | ++ * @mapping_size: Requested size of the actual bounce buffer, excluding |
| 115 | ++ * any pre- or post-padding for alignment |
| 116 | ++ * @alloc_align_mask: Required start and end alignment of the allocated buffer |
| 117 | ++ * @dir: DMA direction |
| 118 | ++ * @attrs: Optional DMA attributes for the map operation |
| 119 | ++ * |
| 120 | ++ * Find and allocate a suitable sequence of IO TLB slots for the request. |
| 121 | ++ * The allocated space starts at an alignment specified by alloc_align_mask, |
| 122 | ++ * and the size of the allocated space is rounded up so that the total amount |
| 123 | ++ * of allocated space is a multiple of (alloc_align_mask + 1). If |
| 124 | ++ * alloc_align_mask is zero, the allocated space may be at any alignment and |
| 125 | ++ * the size is not rounded up. |
| 126 | ++ * |
| 127 | ++ * The returned address is within the allocated space and matches the bits |
| 128 | ++ * of orig_addr that are specified in the DMA min_align_mask for the device. As |
| 129 | ++ * such, this returned address may be offset from the beginning of the allocated |
| 130 | ++ * space. The bounce buffer space starting at the returned address for |
| 131 | ++ * mapping_size bytes is initialized to the contents of the original IO buffer |
| 132 | ++ * area. Any pre-padding (due to an offset) and any post-padding (due to |
| 133 | ++ * rounding-up the size) is not initialized. |
| 134 | ++ */ |
| 135 | +++>>>>>>> 327e2c97c46a (swiotlb: remove alloc_size argument to swiotlb_tbl_map_single()) |
| 136 | + phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, |
| 137 | +- size_t mapping_size, size_t alloc_size, |
| 138 | +- unsigned int alloc_align_mask, enum dma_data_direction dir, |
| 139 | +- unsigned long attrs) |
| 140 | ++ size_t mapping_size, unsigned int alloc_align_mask, |
| 141 | ++ enum dma_data_direction dir, unsigned long attrs) |
| 142 | + { |
| 143 | + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; |
| 144 | + - unsigned int offset; |
| 145 | + - struct io_tlb_pool *pool; |
| 146 | + + unsigned int offset = swiotlb_align_offset(dev, orig_addr); |
| 147 | + unsigned int i; |
| 148 | ++ size_t size; |
| 149 | + int index; |
| 150 | + phys_addr_t tlb_addr; |
| 151 | + - unsigned short pad_slots; |
| 152 | + |
| 153 | + if (!mem || !mem->nslabs) { |
| 154 | + dev_warn_ratelimited(dev, |
| 155 | +@@@ -733,14 -1387,19 +789,24 @@@ |
| 156 | + if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) |
| 157 | + pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); |
| 158 | + |
| 159 | +- if (mapping_size > alloc_size) { |
| 160 | +- dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", |
| 161 | +- mapping_size, alloc_size); |
| 162 | +- return (phys_addr_t)DMA_MAPPING_ERROR; |
| 163 | +- } |
| 164 | ++ /* |
| 165 | ++ * The default swiotlb memory pool is allocated with PAGE_SIZE |
| 166 | ++ * alignment. If a mapping is requested with larger alignment, |
| 167 | ++ * the mapping may be unable to use the initial slot(s) in all |
| 168 | ++ * sets of IO_TLB_SEGSIZE slots. In such case, a mapping request |
| 169 | ++ * of or near the maximum mapping size would always fail. |
| 170 | ++ */ |
| 171 | ++ dev_WARN_ONCE(dev, alloc_align_mask > ~PAGE_MASK, |
| 172 | ++ "Alloc alignment may prevent fulfilling requests with max mapping_size\n"); |
| 173 | + |
| 174 | +++<<<<<<< HEAD |
| 175 | + + index = swiotlb_find_slots(dev, orig_addr, |
| 176 | + + alloc_size + offset, alloc_align_mask); |
| 177 | +++======= |
| 178 | ++ offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); |
| 179 | ++ size = ALIGN(mapping_size + offset, alloc_align_mask + 1); |
| 180 | ++ index = swiotlb_find_slots(dev, orig_addr, size, alloc_align_mask, &pool); |
| 181 | +++>>>>>>> 327e2c97c46a (swiotlb: remove alloc_size argument to swiotlb_tbl_map_single()) |
| 182 | + if (index == -1) { |
| 183 | + if (!(attrs & DMA_ATTR_NO_WARN)) |
| 184 | + dev_warn_ratelimited(dev, |
| 185 | +@@@ -754,15 -1413,21 +820,25 @@@ |
| 186 | + * This is needed when we sync the memory. Then we sync the buffer if |
| 187 | + * needed. |
| 188 | + */ |
| 189 | +++<<<<<<< HEAD |
| 190 | + + for (i = 0; i < nr_slots(alloc_size + offset); i++) |
| 191 | + + mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); |
| 192 | + + tlb_addr = slot_addr(mem->start, index) + offset; |
| 193 | +++======= |
| 194 | ++ pad_slots = offset >> IO_TLB_SHIFT; |
| 195 | ++ offset &= (IO_TLB_SIZE - 1); |
| 196 | ++ index += pad_slots; |
| 197 | ++ pool->slots[index].pad_slots = pad_slots; |
| 198 | ++ for (i = 0; i < (nr_slots(size) - pad_slots); i++) |
| 199 | ++ pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); |
| 200 | ++ tlb_addr = slot_addr(pool->start, index) + offset; |
| 201 | +++>>>>>>> 327e2c97c46a (swiotlb: remove alloc_size argument to swiotlb_tbl_map_single()) |
| 202 | + /* |
| 203 | + - * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy |
| 204 | + - * the original buffer to the TLB buffer before initiating DMA in order |
| 205 | + - * to preserve the original's data if the device does a partial write, |
| 206 | + - * i.e. if the device doesn't overwrite the entire buffer. Preserving |
| 207 | + - * the original data, even if it's garbage, is necessary to match |
| 208 | + - * hardware behavior. Use of swiotlb is supposed to be transparent, |
| 209 | + - * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes. |
| 210 | + + * When dir == DMA_FROM_DEVICE we could omit the copy from the orig |
| 211 | + + * to the tlb buffer, if we knew for sure the device will |
| 212 | + + * overwirte the entire current content. But we don't. Thus |
| 213 | + + * unconditional bounce may prevent leaking swiotlb content (i.e. |
| 214 | + + * kernel memory) to user-space. |
| 215 | + */ |
| 216 | + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); |
| 217 | + return tlb_addr; |
| 218 | +diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c |
| 219 | +index e531d2c4ba52..c9497d9d08ae 100644 |
| 220 | +--- a/drivers/iommu/dma-iommu.c |
| 221 | ++++ b/drivers/iommu/dma-iommu.c |
| 222 | +@@ -1025,7 +1025,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, |
| 223 | + } |
| 224 | + |
| 225 | + aligned_size = iova_align(iovad, size); |
| 226 | +- phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size, |
| 227 | ++ phys = swiotlb_tbl_map_single(dev, phys, size, |
| 228 | + iova_mask(iovad), dir, attrs); |
| 229 | + |
| 230 | + if (phys == DMA_MAPPING_ERROR) |
| 231 | +diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c |
| 232 | +index d027bb25e533..f3e78470c8c3 100644 |
| 233 | +--- a/drivers/xen/swiotlb-xen.c |
| 234 | ++++ b/drivers/xen/swiotlb-xen.c |
| 235 | +@@ -387,7 +387,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, |
| 236 | + */ |
| 237 | + trace_swiotlb_bounced(dev, dev_addr, size); |
| 238 | + |
| 239 | +- map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs); |
| 240 | ++ map = swiotlb_tbl_map_single(dev, phys, size, 0, dir, attrs); |
| 241 | + if (map == (phys_addr_t)DMA_MAPPING_ERROR) |
| 242 | + return DMA_MAPPING_ERROR; |
| 243 | + |
| 244 | +diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h |
| 245 | +index 2ef25e6fa1b4..5fc11fcaf6fd 100644 |
| 246 | +--- a/include/linux/swiotlb.h |
| 247 | ++++ b/include/linux/swiotlb.h |
| 248 | +@@ -42,7 +42,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, |
| 249 | + extern void __init swiotlb_update_mem_attributes(void); |
| 250 | + |
| 251 | + phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, |
| 252 | +- size_t mapping_size, size_t alloc_size, |
| 253 | ++ size_t mapping_size, |
| 254 | + unsigned int alloc_aligned_mask, enum dma_data_direction dir, |
| 255 | + unsigned long attrs); |
| 256 | + |
| 257 | +* Unmerged path kernel/dma/swiotlb.c |
0 commit comments