Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/menlo-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ jobs:

build-and-test:
runs-on: ${{ matrix.runs-on }}
needs: [create-draft-release]
timeout-minutes: 270
strategy:
fail-fast: false
Expand Down Expand Up @@ -285,7 +284,7 @@ jobs:
uses: actions/checkout@v3
with:
submodules: recursive

- name: Replace our Makefile
run: |
cat menlo/Makefile | tee Makefile
Expand Down Expand Up @@ -635,4 +634,4 @@ jobs:
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
asset_path: /tmp/cudart-llama-bin-win-cu11.7-x64.tar.gz
asset_name: cudart-llama-bin-win-cu11.7-x64.tar.gz
asset_content_type: application/gzip
asset_content_type: application/gzip
13 changes: 11 additions & 2 deletions ggml/src/ggml-cuda/conv2d.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ struct kernel_bounds {
int64_t x_min, x_max;
};

template<typename T>
__device__ __forceinline__ float to_float(const T& val) {
if constexpr (std::is_same_v<T, __half>) {
return __half2float(val);
} else {
return val; // Assumes T is float
}
}

__device__ __forceinline__ int64_t max64(int64_t a, int64_t b) {
return (a > b) ? a : b;
}
Expand Down Expand Up @@ -94,8 +103,8 @@ static __global__ void conv2d_kernel(const float * __restrict__ input,
const int64_t in_x = calculate_input_coord(out_x, kx, P.ST_X, P.DL_X, P.PD_X);

const float input_val = input[Layout::input_index(n, c_in, in_y, in_x, P)];
const float kernel_val = kernel[Layout::kernel_index(c_out, c_in, ky, kx, P)];
acc += (input_val * kernel_val);
const T kernel_val = kernel[Layout::kernel_index(c_out, c_in, ky, kx, P)];
acc += (input_val * to_float(kernel_val));
}
}
}
Expand Down
Loading