Skip to content

Commit 2057745

Browse files
committed
Add CUDA/OpenGL interop example writing to a texture
1 parent ac6a2d8 commit 2057745

File tree

1 file changed

+169
-0
lines changed

1 file changed

+169
-0
lines changed
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/*
2+
* Copyright LWJGL. All rights reserved.
3+
* License terms: https://www.lwjgl.org/license
4+
*/
5+
package org.lwjgl.demo.cuda;
6+
7+
import static org.lwjgl.cuda.CU.*;
8+
import static org.lwjgl.cuda.CU40.*;
9+
import static org.lwjgl.cuda.CUGL.*;
10+
import static org.lwjgl.glfw.GLFW.*;
11+
import static org.lwjgl.opengl.GL.*;
12+
import static org.lwjgl.opengl.GL11.*;
13+
import static org.lwjgl.system.MemoryUtil.*;
14+
15+
import java.nio.*;
16+
17+
import org.lwjgl.*;
18+
import org.lwjgl.glfw.*;
19+
import org.lwjgl.opengl.*;
20+
import org.lwjgl.system.*;
21+
22+
/**
23+
* CUDA/OpenGL interop example.
24+
* <p>
25+
* A device kernel function is used to fill an OpenGL texture with a constant
26+
* color and GLFW is used to display that texture in a window.
27+
*
28+
* @author Kai Burjack
29+
*/
30+
public class OpenGLExample {
31+
private static void check(int err) {
32+
if (err != 0)
33+
throw new AssertionError("Error code: " + err);
34+
}
35+
36+
private static void run(MemoryStack s) {
37+
// Create a super simple OpenGL context and a texture
38+
glfwInit();
39+
glfwWindowHint(GLFW_VISIBLE, GLFW_FALSE);
40+
glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE);
41+
long window = glfwCreateWindow(512, 512, "Hello CUDA!", NULL, NULL);
42+
GLFWKeyCallback keyCallback;
43+
glfwSetKeyCallback(window, keyCallback = new GLFWKeyCallback() {
44+
@Override
45+
public void invoke(long window, int key, int scancode, int action, int mods) {
46+
if (action == GLFW_PRESS && key == GLFW_KEY_ESCAPE)
47+
glfwSetWindowShouldClose(window, true);
48+
}
49+
});
50+
glfwMakeContextCurrent(window);
51+
createCapabilities();
52+
Callback debugProc = GLUtil.setupDebugMessageCallback();
53+
int tex = glGenTextures();
54+
glBindTexture(GL_TEXTURE_2D, tex);
55+
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
56+
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 512, 512, 0, GL_RGBA, GL_UNSIGNED_BYTE, (ByteBuffer) null);
57+
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
58+
glEnable(GL_TEXTURE_2D);
59+
60+
// Allocate some buffers up-front
61+
IntBuffer count = s.mallocInt(1);
62+
IntBuffer dev = s.mallocInt(1);
63+
PointerBuffer ctx = s.mallocPointer(1);
64+
PointerBuffer resource = s.mallocPointer(1);
65+
PointerBuffer array = s.mallocPointer(1);
66+
PointerBuffer module = s.mallocPointer(1);
67+
PointerBuffer surfref = s.mallocPointer(1);
68+
PointerBuffer function = s.mallocPointer(1);
69+
70+
// Initialize/load the CUDA device driver for this process
71+
check(cuInit(0));
72+
// Check if we actually have at least one CUDA-capable device
73+
check(cuDeviceGetCount(count));
74+
if (count.get(0) == 0)
75+
throw new AssertionError("No CUDA-capable device found");
76+
// Obtain handle to first device
77+
check(cuDeviceGet(dev, 0));
78+
// and create a CUDA context on that device, which will also be made
79+
// current in the calling thread (much like OpenGL's context)
80+
check(cuCtxCreate(ctx, 0, dev.get(0)));
81+
// Create the PTX source string of the module
82+
String ptx =
83+
// Minimum PTX version 1.5 to be able to use .surfref and sust
84+
".version 1.5\n" +
85+
// We make no use of actual shader model capabilities/functions, so target the lowest possible
86+
".target sm_11\n" +
87+
// Add a global reference to a surface which we will write to
88+
".global .surfref surface;\n" +
89+
// Function to write color to a surface
90+
".visible .entry fillcolor () {\n" +
91+
// Allocate some registers to compute the thread (x, y) coordinates
92+
" .reg .u32 %blockid, %blockdim, %thrid, %xidx, %yidx;\n" +
93+
// Compute the x coordinate of this thread for writing to the surface
94+
// xidx = ctaid.x * ntid.x + tid.x
95+
" mov.u32 %blockid, %ctaid.x;\n" +
96+
" mov.u32 %blockdim, %ntid.x;\n" +
97+
" mov.u32 %thrid, %tid.x;\n" +
98+
" mad.lo.u32 %xidx, %blockid, %blockdim, %thrid;\n" +
99+
// Pay close attention to the documentation of the sust instruction!
100+
// "The lowest dimension coordinate represents a byte offset into the surface and is not scaled."
101+
// So we have to multiply that by 4 in order to get the actual texel x-coordinate:
102+
" shl.b32 %xidx, %xidx, 2U;\n" +
103+
// Compute the y coordinate of this thread for writing to the surface
104+
// yidx = ctaid.y * ntid.y + tid.y
105+
" mov.u32 %blockid, %ctaid.y;\n" +
106+
" mov.u32 %blockdim, %ntid.y;\n" +
107+
" mov.u32 %thrid, %tid.y;\n" +
108+
" mad.lo.u32 %yidx, %blockid, %blockdim, %thrid;\n" +
109+
// Write yellow to the surface
110+
" sust.b.2d.v4.b8.trap [surface, {%xidx, %yidx}], {255, 255, 0, 255};\n" +
111+
"}";
112+
// Register the OpenGL texture as a CUDA resource
113+
check(cuGraphicsGLRegisterImage(resource, tex, GL_TEXTURE_2D,
114+
// Flag to tell that CUDA will overwrite the image
115+
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD |
116+
// Flag to tell that this resource is used via a surface reference
117+
CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST));
118+
// Map the resource to be used by further CUDA graphics functions
119+
// Without this, cuGraphicsSubResourceGetMappedArray() will not work
120+
check(cuGraphicsMapResources(resource, NULL));
121+
// Get the first image of the OpenGL texture as a CUDA array
122+
check(cuGraphicsSubResourceGetMappedArray(array, resource.get(0), 0, 0));
123+
// Unmap the resource
124+
check(cuGraphicsUnmapResources(resource, NULL));
125+
// Load the PTX module
126+
check(cuModuleLoadData(module, s.ASCII(ptx)));
127+
// Obtain handle to the `surface` surface reference of the module
128+
check(cuModuleGetSurfRef(surfref, module.get(0), "surface"));
129+
// Assign the array to the surface reference used by the kernel function
130+
check(cuSurfRefSetArray(surfref.get(0), array.get(0), 0));
131+
// Obtain handle to the `fillcolor` function of the module
132+
check(cuModuleGetFunction(function, module.get(0), "fillcolor"));
133+
// Execute the kernel function
134+
check(cuLaunchKernel(function.get(0),
135+
64, 64, 1, // <- 64x64x1 blocks
136+
8, 8, 1, // <- 8x8x1 threads per block
137+
0, // <- no shared memory
138+
0, // <- use default stream
139+
null, // <- no function parameters
140+
null)); // <- no extra parameters
141+
// Clean-up CUDA resources
142+
check(cuCtxDestroy(ctx.get(0)));
143+
144+
// Show window and render the texture
145+
glfwShowWindow(window);
146+
while (!glfwWindowShouldClose(window)) {
147+
glBegin(GL_QUADS);
148+
glTexCoord2f(0, 0); glVertex2f(-1, -1);
149+
glTexCoord2f(1, 0); glVertex2f(+1, -1);
150+
glTexCoord2f(1, 1); glVertex2f(+1, +1);
151+
glTexCoord2f(0, 1); glVertex2f(-1, +1);
152+
glEnd();
153+
glfwSwapBuffers(window);
154+
glfwPollEvents();
155+
}
156+
glfwDestroyWindow(window);
157+
glfwTerminate();
158+
if (debugProc != null)
159+
debugProc.free();
160+
keyCallback.free();
161+
GL.setCapabilities(null);
162+
}
163+
164+
public static void main(String[] args) {
165+
try (MemoryStack frame = MemoryStack.stackPush()) {
166+
run(frame);
167+
}
168+
}
169+
}

0 commit comments

Comments
 (0)