|
25 | 25 | #include <linux/kasan.h> |
26 | 26 | #include <linux/bpf_verifier.h> |
27 | 27 | #include <linux/uaccess.h> |
| 28 | +#include <linux/decompress/generic.h> |
28 | 29 |
|
29 | 30 | #include "../../lib/kstrtox.h" |
30 | 31 |
|
@@ -3714,13 +3715,238 @@ __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign) |
3714 | 3715 | return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX); |
3715 | 3716 | } |
3716 | 3717 |
|
| 3718 | +#ifdef CONFIG_KEXEC_PE_IMAGE |
| 3719 | + |
| 3720 | +#define MAX_UNCOMPRESSED_BUF_SIZE (1 << 28) |
| 3721 | +/* a chunk should be large enough to contain a decompressing */ |
| 3722 | +#define CHUNK_SIZE (1 << 23) |
| 3723 | + |
| 3724 | +/* |
| 3725 | + * At present, one global allocator for decompression. Later if needed, changing the |
| 3726 | + * prototype of decompress_fn to introduce each task's allocator. |
| 3727 | + */ |
| 3728 | +static DEFINE_MUTEX(output_buf_mutex); |
| 3729 | + |
| 3730 | +struct decompress_mem_allocator { |
| 3731 | + struct page **pages; |
| 3732 | + unsigned int pg_idx; |
| 3733 | + void *chunk_start; |
| 3734 | + unsigned int chunk_size; |
| 3735 | + void *chunk_cur; |
| 3736 | +}; |
| 3737 | + |
| 3738 | +static struct decompress_mem_allocator dcmpr_allocator; |
| 3739 | + |
| 3740 | +/* |
| 3741 | + * Set up an active chunk to hold partial decompressed data. |
| 3742 | + */ |
| 3743 | +static void *vmap_decompressed_chunk(void) |
| 3744 | +{ |
| 3745 | + struct decompress_mem_allocator *a = &dcmpr_allocator; |
| 3746 | + unsigned int i, pg_cnt = a->chunk_size >> PAGE_SHIFT; |
| 3747 | + struct page **pg_start = &a->pages[a->pg_idx]; |
| 3748 | + |
| 3749 | + for (i = 0; i < pg_cnt; i++) |
| 3750 | + a->pages[a->pg_idx++] = alloc_page(GFP_KERNEL | __GFP_ACCOUNT); |
| 3751 | + |
| 3752 | + return vmap(pg_start, pg_cnt, VM_MAP, PAGE_KERNEL); |
| 3753 | +} |
| 3754 | + |
| 3755 | +/* |
| 3756 | + * Present the scattered pages containing decompressed data at a unified virtual |
| 3757 | + * address. |
| 3758 | + */ |
| 3759 | +static int decompress_mem_allocator_handover(struct decompress_mem_allocator *a, |
| 3760 | + struct mem_range_result *range) |
| 3761 | +{ |
| 3762 | + unsigned long pg_array_sz = a->pg_idx * sizeof(struct page *); |
| 3763 | + |
| 3764 | + range->pages = vmalloc(pg_array_sz); |
| 3765 | + if (!range->pages) |
| 3766 | + return -ENOMEM; |
| 3767 | + |
| 3768 | + range->pg_cnt = a->pg_idx; |
| 3769 | + memcpy(range->pages, a->pages, pg_array_sz); |
| 3770 | + range->buf = vmap(range->pages, range->pg_cnt, VM_MAP, PAGE_KERNEL); |
| 3771 | + if (!range->buf) { |
| 3772 | + vfree(range->pages); |
| 3773 | + return -1; |
| 3774 | + } |
| 3775 | + /* |
| 3776 | + * Free the tracing pointer; The pages are freed when mem_range_result |
| 3777 | + * is released. |
| 3778 | + */ |
| 3779 | + vfree(a->pages); |
| 3780 | + a->pages = NULL; |
| 3781 | + |
| 3782 | + /* vmap-ed */ |
| 3783 | + range->alloc_type = TYPE_VMAP; |
| 3784 | + range->buf_sz = a->pg_idx << PAGE_SHIFT; |
| 3785 | + range->data_sz = range->buf_sz - a->chunk_size; |
| 3786 | + range->data_sz += a->chunk_cur - a->chunk_start; |
| 3787 | + |
| 3788 | + return 0; |
| 3789 | +} |
| 3790 | + |
| 3791 | +static int decompress_mem_allocator_init( |
| 3792 | + struct decompress_mem_allocator *allocator, |
| 3793 | + unsigned int chunk_size) |
| 3794 | +{ |
| 3795 | + unsigned long sz = (MAX_UNCOMPRESSED_BUF_SIZE >> PAGE_SHIFT) * sizeof(struct page *); |
| 3796 | + |
| 3797 | + allocator->pages = __vmalloc(sz, GFP_KERNEL | __GFP_ACCOUNT); |
| 3798 | + if (!allocator->pages) |
| 3799 | + return -ENOMEM; |
| 3800 | + |
| 3801 | + allocator->pg_idx = 0; |
| 3802 | + allocator->chunk_start = NULL; |
| 3803 | + allocator->chunk_size = chunk_size; |
| 3804 | + allocator->chunk_cur = NULL; |
| 3805 | + return 0; |
| 3806 | +} |
| 3807 | + |
| 3808 | +static void decompress_mem_allocator_fini(struct decompress_mem_allocator *allocator) |
| 3809 | +{ |
| 3810 | + unsigned int i; |
| 3811 | + |
| 3812 | + /* unmap the active chunk */ |
| 3813 | + if (!!allocator->chunk_start) |
| 3814 | + vunmap(allocator->chunk_start); |
| 3815 | + if (!!allocator->pages) { |
| 3816 | + for (i = 0; i < allocator->pg_idx; i++) |
| 3817 | + __free_pages(allocator->pages[i], 0); |
| 3818 | + vfree(allocator->pages); |
| 3819 | + } |
| 3820 | +} |
| 3821 | + |
| 3822 | +/* |
| 3823 | + * This is a callback for decompress_fn. |
| 3824 | + * |
| 3825 | + * It copies the partial decompressed content in [buf, buf + len) to dst. If the |
| 3826 | + * active chunk is not large enough, retire it and activate a new chunk to hold |
| 3827 | + * the remaining data. |
| 3828 | + */ |
| 3829 | +static long flush(void *buf, unsigned long len) |
| 3830 | +{ |
| 3831 | + struct decompress_mem_allocator *a = &dcmpr_allocator; |
| 3832 | + long free, copied = 0; |
| 3833 | + |
| 3834 | + /* The first time allocation */ |
| 3835 | + if (unlikely(!a->chunk_start)) { |
| 3836 | + a->chunk_start = a->chunk_cur = vmap_decompressed_chunk(); |
| 3837 | + if (unlikely(!a->chunk_start)) |
| 3838 | + return -1; |
| 3839 | + } |
| 3840 | + |
| 3841 | + free = a->chunk_start + a->chunk_size - a->chunk_cur; |
| 3842 | + BUG_ON(free < 0); |
| 3843 | + if (free < len) { |
| 3844 | + /* |
| 3845 | + * If the totoal size exceeds MAX_UNCOMPRESSED_BUF_SIZE, |
| 3846 | + * return -1 to indicate the decompress method that something |
| 3847 | + * is wrong |
| 3848 | + */ |
| 3849 | + if (unlikely((a->pg_idx >= MAX_UNCOMPRESSED_BUF_SIZE >> PAGE_SHIFT))) |
| 3850 | + return -1; |
| 3851 | + memcpy(a->chunk_cur, buf, free); |
| 3852 | + copied += free; |
| 3853 | + a->chunk_cur += free; |
| 3854 | + len -= free; |
| 3855 | + /* |
| 3856 | + * When retiring the active chunk, release its virtual address |
| 3857 | + * but do not release the contents in the pages. |
| 3858 | + */ |
| 3859 | + vunmap(a->chunk_start); |
| 3860 | + a->chunk_start = a->chunk_cur = vmap_decompressed_chunk(); |
| 3861 | + if (unlikely(!a->chunk_start)) |
| 3862 | + return -1; |
| 3863 | + } |
| 3864 | + memcpy(a->chunk_cur, buf, len); |
| 3865 | + copied += len; |
| 3866 | + a->chunk_cur += len; |
| 3867 | + return copied; |
| 3868 | +} |
| 3869 | + |
| 3870 | +__bpf_kfunc struct mem_range_result *bpf_decompress(char *image_gz_payload, int image_gz_sz) |
| 3871 | +{ |
| 3872 | + struct decompress_mem_allocator *a = &dcmpr_allocator; |
| 3873 | + decompress_fn decompressor; |
| 3874 | + struct mem_cgroup *memcg, *old_memcg; |
| 3875 | + struct mem_range_result *range; |
| 3876 | + const char *name; |
| 3877 | + char *input_buf; |
| 3878 | + int ret; |
| 3879 | + |
| 3880 | + memcg = get_mem_cgroup_from_current(); |
| 3881 | + old_memcg = set_active_memcg(memcg); |
| 3882 | + range = mem_range_result_alloc(); |
| 3883 | + if (!range) { |
| 3884 | + pr_err("fail to allocate mem_range_result\n"); |
| 3885 | + goto error; |
| 3886 | + } |
| 3887 | + |
| 3888 | + input_buf = __vmalloc(image_gz_sz, GFP_KERNEL | __GFP_ACCOUNT); |
| 3889 | + if (!input_buf) { |
| 3890 | + kfree(range); |
| 3891 | + pr_err("fail to allocate input buffer\n"); |
| 3892 | + goto error; |
| 3893 | + } |
| 3894 | + |
| 3895 | + ret = copy_from_kernel_nofault(input_buf, image_gz_payload, image_gz_sz); |
| 3896 | + if (ret < 0) { |
| 3897 | + kfree(range); |
| 3898 | + vfree(input_buf); |
| 3899 | + pr_err("Error when copying from 0x%p, size:0x%x\n", |
| 3900 | + image_gz_payload, image_gz_sz); |
| 3901 | + goto error; |
| 3902 | + } |
| 3903 | + |
| 3904 | + mutex_lock(&output_buf_mutex); |
| 3905 | + decompress_mem_allocator_init(a, CHUNK_SIZE); |
| 3906 | + decompressor = decompress_method(input_buf, image_gz_sz, &name); |
| 3907 | + if (!decompressor) { |
| 3908 | + kfree(range); |
| 3909 | + vfree(input_buf); |
| 3910 | + pr_err("Can not find decompress method\n"); |
| 3911 | + goto error; |
| 3912 | + } |
| 3913 | + ret = decompressor(input_buf, image_gz_sz, NULL, flush, |
| 3914 | + NULL, NULL, NULL); |
| 3915 | + |
| 3916 | + vfree(input_buf); |
| 3917 | + if (ret == 0) { |
| 3918 | + ret = decompress_mem_allocator_handover(a, range); |
| 3919 | + if (!!ret) |
| 3920 | + goto fail; |
| 3921 | + range->status = 0; |
| 3922 | + mem_cgroup_tryget(memcg); |
| 3923 | + range->memcg = memcg; |
| 3924 | + set_active_memcg(old_memcg); |
| 3925 | + } |
| 3926 | +fail: |
| 3927 | + decompress_mem_allocator_fini(a); |
| 3928 | + mutex_unlock(&output_buf_mutex); |
| 3929 | + if (!!ret) { |
| 3930 | + kfree(range); |
| 3931 | + range = NULL; |
| 3932 | + pr_err("Decompress error\n"); |
| 3933 | + } |
| 3934 | + |
| 3935 | +error: |
| 3936 | + set_active_memcg(old_memcg); |
| 3937 | + mem_cgroup_put(memcg); |
| 3938 | + return range; |
| 3939 | +} |
| 3940 | +#endif |
| 3941 | + |
3717 | 3942 | __bpf_kfunc_end_defs(); |
3718 | 3943 |
|
3719 | 3944 | BTF_KFUNCS_START(generic_btf_ids) |
3720 | 3945 | #ifdef CONFIG_CRASH_DUMP |
3721 | 3946 | BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) |
3722 | 3947 | #endif |
3723 | 3948 | #ifdef CONFIG_KEXEC_PE_IMAGE |
| 3949 | +BTF_ID_FLAGS(func, bpf_decompress, KF_TRUSTED_ARGS | KF_ACQUIRE | KF_SLEEPABLE) |
3724 | 3950 | BTF_ID_FLAGS(func, bpf_mem_range_result_put, KF_RELEASE | KF_SLEEPABLE) |
3725 | 3951 | BTF_ID_FLAGS(func, bpf_copy_to_kernel, KF_TRUSTED_ARGS | KF_SLEEPABLE) |
3726 | 3952 | #endif |
|
0 commit comments