|
| 1 | +/* |
| 2 | + * Copyright LWJGL. All rights reserved. |
| 3 | + * License terms: https://www.lwjgl.org/license |
| 4 | + */ |
| 5 | +package org.lwjgl.demo.cuda; |
| 6 | + |
| 7 | +import static org.lwjgl.cuda.CU.*; |
| 8 | +import static org.lwjgl.cuda.CU40.*; |
| 9 | +import static org.lwjgl.cuda.CUGL.*; |
| 10 | +import static org.lwjgl.glfw.GLFW.*; |
| 11 | +import static org.lwjgl.opengl.GL.*; |
| 12 | +import static org.lwjgl.opengl.GL11.*; |
| 13 | +import static org.lwjgl.system.MemoryUtil.*; |
| 14 | + |
| 15 | +import java.nio.*; |
| 16 | + |
| 17 | +import org.lwjgl.*; |
| 18 | +import org.lwjgl.glfw.*; |
| 19 | +import org.lwjgl.opengl.*; |
| 20 | +import org.lwjgl.system.*; |
| 21 | + |
| 22 | +/** |
| 23 | + * CUDA/OpenGL interop example. |
| 24 | + * <p> |
| 25 | + * A device kernel function is used to fill an OpenGL texture with a constant |
| 26 | + * color and GLFW is used to display that texture in a window. |
| 27 | + * |
| 28 | + * @author Kai Burjack |
| 29 | + */ |
| 30 | +public class OpenGLExample { |
| 31 | + private static void check(int err) { |
| 32 | + if (err != 0) |
| 33 | + throw new AssertionError("Error code: " + err); |
| 34 | + } |
| 35 | + |
| 36 | + private static void run(MemoryStack s) { |
| 37 | + // Create a super simple OpenGL context and a texture |
| 38 | + glfwInit(); |
| 39 | + glfwWindowHint(GLFW_VISIBLE, GLFW_FALSE); |
| 40 | + glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE); |
| 41 | + long window = glfwCreateWindow(512, 512, "Hello CUDA!", NULL, NULL); |
| 42 | + GLFWKeyCallback keyCallback; |
| 43 | + glfwSetKeyCallback(window, keyCallback = new GLFWKeyCallback() { |
| 44 | + @Override |
| 45 | + public void invoke(long window, int key, int scancode, int action, int mods) { |
| 46 | + if (action == GLFW_PRESS && key == GLFW_KEY_ESCAPE) |
| 47 | + glfwSetWindowShouldClose(window, true); |
| 48 | + } |
| 49 | + }); |
| 50 | + glfwMakeContextCurrent(window); |
| 51 | + createCapabilities(); |
| 52 | + Callback debugProc = GLUtil.setupDebugMessageCallback(); |
| 53 | + int tex = glGenTextures(); |
| 54 | + glBindTexture(GL_TEXTURE_2D, tex); |
| 55 | + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); |
| 56 | + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 512, 512, 0, GL_RGBA, GL_UNSIGNED_BYTE, (ByteBuffer) null); |
| 57 | + glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); |
| 58 | + glEnable(GL_TEXTURE_2D); |
| 59 | + |
| 60 | + // Allocate some buffers up-front |
| 61 | + IntBuffer count = s.mallocInt(1); |
| 62 | + IntBuffer dev = s.mallocInt(1); |
| 63 | + PointerBuffer ctx = s.mallocPointer(1); |
| 64 | + PointerBuffer resource = s.mallocPointer(1); |
| 65 | + PointerBuffer array = s.mallocPointer(1); |
| 66 | + PointerBuffer module = s.mallocPointer(1); |
| 67 | + PointerBuffer surfref = s.mallocPointer(1); |
| 68 | + PointerBuffer function = s.mallocPointer(1); |
| 69 | + |
| 70 | + // Initialize/load the CUDA device driver for this process |
| 71 | + check(cuInit(0)); |
| 72 | + // Check if we actually have at least one CUDA-capable device |
| 73 | + check(cuDeviceGetCount(count)); |
| 74 | + if (count.get(0) == 0) |
| 75 | + throw new AssertionError("No CUDA-capable device found"); |
| 76 | + // Obtain handle to first device |
| 77 | + check(cuDeviceGet(dev, 0)); |
| 78 | + // and create a CUDA context on that device, which will also be made |
| 79 | + // current in the calling thread (much like OpenGL's context) |
| 80 | + check(cuCtxCreate(ctx, 0, dev.get(0))); |
| 81 | + // Create the PTX source string of the module |
| 82 | + String ptx = |
| 83 | +// Minimum PTX version 1.5 to be able to use .surfref and sust |
| 84 | +".version 1.5\n" + |
| 85 | +// We make no use of actual shader model capabilities/functions, so target the lowest possible |
| 86 | +".target sm_11\n" + |
| 87 | +// Add a global reference to a surface which we will write to |
| 88 | +".global .surfref surface;\n" + |
| 89 | +// Function to write color to a surface |
| 90 | +".visible .entry fillcolor () {\n" + |
| 91 | +// Allocate some registers to compute the thread (x, y) coordinates |
| 92 | +" .reg .u32 %blockid, %blockdim, %thrid, %xidx, %yidx;\n" + |
| 93 | +// Compute the x coordinate of this thread for writing to the surface |
| 94 | +// xidx = ctaid.x * ntid.x + tid.x |
| 95 | +" mov.u32 %blockid, %ctaid.x;\n" + |
| 96 | +" mov.u32 %blockdim, %ntid.x;\n" + |
| 97 | +" mov.u32 %thrid, %tid.x;\n" + |
| 98 | +" mad.lo.u32 %xidx, %blockid, %blockdim, %thrid;\n" + |
| 99 | +// Pay close attention to the documentation of the sust instruction! |
| 100 | +// "The lowest dimension coordinate represents a byte offset into the surface and is not scaled." |
| 101 | +// So we have to multiply that by 4 in order to get the actual texel x-coordinate: |
| 102 | +" shl.b32 %xidx, %xidx, 2U;\n" + |
| 103 | +// Compute the y coordinate of this thread for writing to the surface |
| 104 | +// yidx = ctaid.y * ntid.y + tid.y |
| 105 | +" mov.u32 %blockid, %ctaid.y;\n" + |
| 106 | +" mov.u32 %blockdim, %ntid.y;\n" + |
| 107 | +" mov.u32 %thrid, %tid.y;\n" + |
| 108 | +" mad.lo.u32 %yidx, %blockid, %blockdim, %thrid;\n" + |
| 109 | +// Write yellow to the surface |
| 110 | +" sust.b.2d.v4.b8.trap [surface, {%xidx, %yidx}], {255, 255, 0, 255};\n" + |
| 111 | +"}"; |
| 112 | + // Register the OpenGL texture as a CUDA resource |
| 113 | + check(cuGraphicsGLRegisterImage(resource, tex, GL_TEXTURE_2D, |
| 114 | + // Flag to tell that CUDA will overwrite the image |
| 115 | + CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD | |
| 116 | + // Flag to tell that this resource is used via a surface reference |
| 117 | + CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST)); |
| 118 | + // Map the resource to be used by further CUDA graphics functions |
| 119 | + // Without this, cuGraphicsSubResourceGetMappedArray() will not work |
| 120 | + check(cuGraphicsMapResources(resource, NULL)); |
| 121 | + // Get the first image of the OpenGL texture as a CUDA array |
| 122 | + check(cuGraphicsSubResourceGetMappedArray(array, resource.get(0), 0, 0)); |
| 123 | + // Unmap the resource |
| 124 | + check(cuGraphicsUnmapResources(resource, NULL)); |
| 125 | + // Load the PTX module |
| 126 | + check(cuModuleLoadData(module, s.ASCII(ptx))); |
| 127 | + // Obtain handle to the `surface` surface reference of the module |
| 128 | + check(cuModuleGetSurfRef(surfref, module.get(0), "surface")); |
| 129 | + // Assign the array to the surface reference used by the kernel function |
| 130 | + check(cuSurfRefSetArray(surfref.get(0), array.get(0), 0)); |
| 131 | + // Obtain handle to the `fillcolor` function of the module |
| 132 | + check(cuModuleGetFunction(function, module.get(0), "fillcolor")); |
| 133 | + // Execute the kernel function |
| 134 | + check(cuLaunchKernel(function.get(0), |
| 135 | + 64, 64, 1, // <- 64x64x1 blocks |
| 136 | + 8, 8, 1, // <- 8x8x1 threads per block |
| 137 | + 0, // <- no shared memory |
| 138 | + 0, // <- use default stream |
| 139 | + null, // <- no function parameters |
| 140 | + null)); // <- no extra parameters |
| 141 | + // Clean-up CUDA resources |
| 142 | + check(cuCtxDestroy(ctx.get(0))); |
| 143 | + |
| 144 | + // Show window and render the texture |
| 145 | + glfwShowWindow(window); |
| 146 | + while (!glfwWindowShouldClose(window)) { |
| 147 | + glBegin(GL_QUADS); |
| 148 | + glTexCoord2f(0, 0); glVertex2f(-1, -1); |
| 149 | + glTexCoord2f(1, 0); glVertex2f(+1, -1); |
| 150 | + glTexCoord2f(1, 1); glVertex2f(+1, +1); |
| 151 | + glTexCoord2f(0, 1); glVertex2f(-1, +1); |
| 152 | + glEnd(); |
| 153 | + glfwSwapBuffers(window); |
| 154 | + glfwPollEvents(); |
| 155 | + } |
| 156 | + glfwDestroyWindow(window); |
| 157 | + glfwTerminate(); |
| 158 | + if (debugProc != null) |
| 159 | + debugProc.free(); |
| 160 | + keyCallback.free(); |
| 161 | + GL.setCapabilities(null); |
| 162 | + } |
| 163 | + |
| 164 | + public static void main(String[] args) { |
| 165 | + try (MemoryStack frame = MemoryStack.stackPush()) { |
| 166 | + run(frame); |
| 167 | + } |
| 168 | + } |
| 169 | +} |
0 commit comments