|
| 1 | +--- |
| 2 | +title: Build out-of-tree kernel module |
| 3 | +weight: 4 |
| 4 | + |
| 5 | +### FIXED, DO NOT MODIFY |
| 6 | +layout: learningpathall |
| 7 | +--- |
| 8 | + |
| 9 | +## Creating the Linux Kernel Module |
| 10 | + |
| 11 | +We will now learn how to create an example Linux kernel module (Character device) that demonstrates a cache miss issue caused by traversing a 2D array in column-major order. This access pattern is not cache-friendly, as it skips over most of the neighboring elements in memory during each iteration. |
| 12 | + |
| 13 | +To build the Linux kernel module, start by creating a new directory—We will call it **example_module**—in any location of your choice. Inside this directory, add two files: `mychardrv.c` and `Makefile`. |
| 14 | + |
| 15 | +**Makefile** |
| 16 | + |
| 17 | +```makefile |
| 18 | +obj-m += mychardrv.o |
| 19 | +BUILDROOT_OUT := /opt/rpi-linux/buildroot/output # Change this to your buildroot output directory |
| 20 | +KDIR := $(BUILDROOT_OUT)/build/linux-custom |
| 21 | +CROSS_COMPILE := $(BUILDROOT_OUT)/host/bin/aarch64-buildroot-linux-gnu- |
| 22 | +ARCH := arm64 |
| 23 | + |
| 24 | +all: |
| 25 | + $(MAKE) -C $(KDIR) M=$(PWD) ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) modules |
| 26 | + |
| 27 | +clean: |
| 28 | + $(MAKE) -C $(KDIR) M=$(PWD) clean |
| 29 | +``` |
| 30 | + |
| 31 | +{{% notice Note %}} |
| 32 | +Change **BUILDROOT_OUT** to the correct buildroot output directory on your host machine |
| 33 | +{{% /notice %}} |
| 34 | + |
| 35 | +**mychardrv.c** |
| 36 | + |
| 37 | +```c |
| 38 | +// SPDX-License-Identifier: GPL-2.0 |
| 39 | +#include "linux/printk.h" |
| 40 | +#include <linux/cdev.h> |
| 41 | +#include <linux/init.h> |
| 42 | +#include <linux/kernel.h> |
| 43 | +#include <linux/module.h> |
| 44 | + |
| 45 | +// Using fixed major and minor numbers just for demonstration purposes. |
| 46 | +// Major number 42 is for demo/sample uses according to |
| 47 | +// https://www.kernel.org/doc/Documentation/admin-guide/devices.txt |
| 48 | +#define MAJOR_VERSION_NUM 42 |
| 49 | +#define MINOR_VERSION_NUM 0 |
| 50 | +#define MODULE_NAME "mychardrv" |
| 51 | +#define MAX_INPUT_LEN 64 |
| 52 | + |
| 53 | +static struct cdev my_char_dev; |
| 54 | + |
| 55 | +/** |
| 56 | + * @brief Traverse a 2D matrix and calculate the sum of its elements. |
| 57 | + * |
| 58 | + * @size: The size of the matrix (number of rows and columns). |
| 59 | + * |
| 60 | + * This function allocates a 2D matrix of integers, initializes it with the sum |
| 61 | + * of its indices, and then calculates the sum of its elements by accessing them |
| 62 | + * in a cache-unfriendly column-major order. |
| 63 | + * |
| 64 | + * Return: 0 on success, or -ENOMEM if memory allocation fails. |
| 65 | + */ |
| 66 | +int char_dev_cache_traverse(long size) { |
| 67 | + int i, j; |
| 68 | + long sum = 0; |
| 69 | + |
| 70 | + int **matrix; |
| 71 | + |
| 72 | + // Allocate rows |
| 73 | + matrix = kmalloc_array(size, sizeof(int *), GFP_KERNEL); |
| 74 | + if (!matrix) |
| 75 | + return -ENOMEM; |
| 76 | + |
| 77 | + // Allocate columns and initialize matrix |
| 78 | + for (i = 0; i < size; i++) { |
| 79 | + matrix[i] = kmalloc_array(size, sizeof(int), GFP_KERNEL); |
| 80 | + if (!matrix[i]) { |
| 81 | + for (int n = 0; n < i; n++) { |
| 82 | + kfree(matrix[n]); |
| 83 | + } |
| 84 | + kfree(matrix); |
| 85 | + return -ENOMEM; |
| 86 | + } |
| 87 | + |
| 88 | + for (j = 0; j < size; j++) |
| 89 | + matrix[i][j] = i + j; |
| 90 | + } |
| 91 | + |
| 92 | + // Access in cache-UNFRIENDLY column-major order |
| 93 | + for (j = 0; j < size; j++) { |
| 94 | + for (i = 0; i < size; i++) { |
| 95 | + sum += matrix[i][j]; |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + pr_info("Sum: %ld\n", sum); |
| 100 | + |
| 101 | + // Free memory |
| 102 | + for (i = 0; i < size; i++) |
| 103 | + kfree(matrix[i]); |
| 104 | + kfree(matrix); |
| 105 | + |
| 106 | + return 0; |
| 107 | +} |
| 108 | + |
| 109 | +/** |
| 110 | + * @brief Gets the size of the list to be created from user space. |
| 111 | + * |
| 112 | + */ |
| 113 | +static ssize_t char_dev_write(struct file *file, const char *buff, |
| 114 | + size_t length, loff_t *offset) { |
| 115 | + (void)file; |
| 116 | + (void)offset; |
| 117 | + |
| 118 | + ssize_t ret = 0; |
| 119 | + char *kbuf; |
| 120 | + long size_value; |
| 121 | + |
| 122 | + // Allocate kernel buffer |
| 123 | + kbuf = kmalloc(MAX_INPUT_LEN, GFP_KERNEL); |
| 124 | + if (!kbuf) |
| 125 | + return -ENOMEM; |
| 126 | + |
| 127 | + // copy data from user space to kernel space |
| 128 | + if (copy_from_user(kbuf, buff, length)) { |
| 129 | + ret = -EFAULT; |
| 130 | + goto out; |
| 131 | + } |
| 132 | + kbuf[length] = '\0'; |
| 133 | + |
| 134 | + // Convert string to long (Base 10) |
| 135 | + ret = kstrtol(kbuf, 10, &size_value); |
| 136 | + if (ret) |
| 137 | + goto out; |
| 138 | + |
| 139 | + // Call cache traversal function |
| 140 | + ret = char_dev_cache_traverse(size_value); |
| 141 | + if (ret) |
| 142 | + goto out; |
| 143 | + |
| 144 | + ret = length; |
| 145 | + |
| 146 | +out: |
| 147 | + kfree(kbuf); |
| 148 | + return ret; |
| 149 | +} |
| 150 | + |
| 151 | +static int char_dev_open(struct inode *node, struct file *file) { |
| 152 | + (void)file; |
| 153 | + pr_info("%s is open - Major(%d) Minor(%d)\n", MODULE_NAME, |
| 154 | + MAJOR(node->i_rdev), MINOR(node->i_rdev)); |
| 155 | + return 0; |
| 156 | +} |
| 157 | + |
| 158 | +static int char_dev_release(struct inode *node, struct file *file) { |
| 159 | + (void)file; |
| 160 | + pr_info("%s is released - Major(%d) Minor(%d)\n", MODULE_NAME, |
| 161 | + MAJOR(node->i_rdev), MINOR(node->i_rdev)); |
| 162 | + return 0; |
| 163 | +} |
| 164 | + |
| 165 | +// File operations structure |
| 166 | +static const struct file_operations dev_fops = {.owner = THIS_MODULE, |
| 167 | + .open = char_dev_open, |
| 168 | + .release = char_dev_release, |
| 169 | + .write = char_dev_write}; |
| 170 | + |
| 171 | +static int __init char_dev_init(void) { |
| 172 | + int ret; |
| 173 | + // Allocate Major number |
| 174 | + ret = register_chrdev_region(MKDEV(MAJOR_VERSION_NUM, MINOR_VERSION_NUM), 1, |
| 175 | + MODULE_NAME); |
| 176 | + if (ret < 0) |
| 177 | + return ret; |
| 178 | + |
| 179 | + // Initialize cdev structure and add it to kernel |
| 180 | + cdev_init(&my_char_dev, &dev_fops); |
| 181 | + ret = cdev_add(&my_char_dev, MKDEV(MAJOR_VERSION_NUM, MINOR_VERSION_NUM), 1); |
| 182 | + |
| 183 | + if (ret < 0) { |
| 184 | + unregister_chrdev_region(MKDEV(MAJOR_VERSION_NUM, MINOR_VERSION_NUM), 1); |
| 185 | + return ret; |
| 186 | + } |
| 187 | + |
| 188 | + return ret; |
| 189 | +} |
| 190 | + |
| 191 | +static void __exit char_dev_exit(void) { |
| 192 | + cdev_del(&my_char_dev); |
| 193 | + unregister_chrdev_region(MKDEV(MAJOR_VERSION_NUM, MINOR_VERSION_NUM), 1); |
| 194 | +} |
| 195 | + |
| 196 | +module_init(char_dev_init); |
| 197 | +module_exit(char_dev_exit); |
| 198 | + |
| 199 | +MODULE_LICENSE("GPL"); |
| 200 | +MODULE_AUTHOR("Yahya Abouelseoud"); |
| 201 | +MODULE_DESCRIPTION("A simple char driver with cache misses issue"); |
| 202 | +``` |
| 203 | +
|
| 204 | +The module above receives the size of a 2D array as a string through the `char_dev_write()` function, converts it to an integer, and passes it to the `char_dev_cache_traverse()` function. This function then creates the 2D array, initializes it with simple data, traverses it in a column-major (cache-unfriendly) order, computes the sum of its elements, and prints the result to the kernel log. |
| 205 | +
|
| 206 | +## Building and Running the Kernel Module |
| 207 | +
|
| 208 | +1. To compile the kernel module, run make inside the example_module directory. This will generate the output file `mychardrv.ko`. |
| 209 | +
|
| 210 | +2. Transfer the .ko file to the target using scp command and then insert it using insmod command. After inserting the module, we create a character device node using mknod command. Finally, we can test the module by writing a size value (e.g., 10000) to the device file and measuring the time taken for the operation using the `time` command. |
| 211 | +
|
| 212 | + ```bash |
| 213 | + scp mychardrv.ko root@<target-ip>:/root/ |
| 214 | + ``` |
| 215 | +
|
| 216 | + {{% notice Note %}} |
| 217 | + Replace \<target-ip> with your own target IP address |
| 218 | + {{% /notice %}} |
| 219 | +
|
| 220 | +3. To run the module on the target, we need to run the following commands on the target: |
| 221 | +
|
| 222 | + ```bash |
| 223 | + ssh root@<your-target-ip> |
| 224 | + |
| 225 | + #The following commands should be running on target device |
| 226 | + |
| 227 | + insmod /root/mychardrv.ko |
| 228 | + mknod /dev/mychardrv c 42 0 |
| 229 | + ``` |
| 230 | +
|
| 231 | + {{% notice Note %}} |
| 232 | + 42 and 0 are the major and minor number we chose in our module code above |
| 233 | + {{% /notice %}} |
| 234 | +
|
| 235 | +4. Now if you run dmesg you should see something like: |
| 236 | +
|
| 237 | + ```log |
| 238 | + [12381.654983] mychardrv is open - Major(42) Minor(0) |
| 239 | + ``` |
| 240 | +
|
| 241 | +5. To make sure it's working as expected you can use the following command: |
| 242 | +
|
| 243 | + ```bash { output_lines = "2-4" } |
| 244 | + time echo '10000' > /dev/mychardrv |
| 245 | + # real 0m 38.04s |
| 246 | + # user 0m 0.00s |
| 247 | + # sys 0m 38.03s |
| 248 | + ``` |
| 249 | +
|
| 250 | + The command above passes 10000 to the module, which specifies the size of the 2D array to be created and traversed. The **echo** command takes a long time to complete (around 38 seconds) due to the cache-unfriendly traversal implemented in the `char_dev_cache_traverse()` function. |
| 251 | +
|
| 252 | +With the kernel module built, the next step is to profile it using Arm Streamline. We will use it to capture runtime behavior, highlight performance bottlenecks, and help identifying issues such as the cache-unfriendly traversal in our module. |
0 commit comments