diff --git a/README.md b/README.md
index 110697ce..a78a4798 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,87 @@ CUDA Path Tracer
 
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3**
 
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+Nuofan Xu, Windows 10, AMD Ryzen 3800X, RTX2080 Super
 
-### (TODO: Your README)
+A CUDA-accelerated path tracer: a global-illumination renderer.
 
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+![](imgs/941.PNG)
 
+*(The walls are implemented as diffusing surface, and the ball is implemented as perfect specular surface.)*
+**
+
+## Features
+-Basic pathtracer
+-Refractive, Reflective, and Diffused surfaces
+-Antialiasing
+-Depth of Field
+-Direct Lighting
+-Steam Compaction
+
+### Material Surfaces
+
+The pathtracer supports the following material surfaces:
+
+* diffuse: light scatters uniformly from the surface
+* specular/reflective (perfect): light reflects perfectly off the surface.
+* refractive (using [Schlick's Approximation][wiki-schlick] of the Fresnel
+  effect to blend reflective and refractive light bounces).
+
+  [wiki-schlick]: https://en.wikipedia.org/wiki/Schlick%27s_approximation
+
+### Triangle checking ###
+![](imgs/triang.PNG)
+
+### Refractive Ball ###
+![](imgs/refractive.PNG)
+*(The ball is implemented with 1.5 refraction rate)*
+
+### Direct Light ### 
+To get a direct lighting effect in our images we want to make the last bounce of each ray hit a light source. We do this by randomly selecting a point on a randomly selected light and setting the direction of the ray to that point. The first image is without direct lighting, and the second one we can see is overall brighter with the direct lighting enabled.
+![](imgs/direct_light.PNG)
+![](imgs/941.PNG)
+
+### Depth of Field
+
+Depth of field simulates a physical aperture by varying the origin and direction
+of rays shot from the camera. DOF is defined with a focal distance from the
+camera, at which objects are perfectly in focus, and an aperture radius,
+defining the amount of variance added to each ray.
+
+![](imgs/blur.PNG)
+![](imgs/941.PNG)
+
+*(no DOF; DOF with distance 5.5, aperture 0.4; around 1000 iterations each)*
+
+### Antialiasing
+
+Camera rays are shot from the camera's position in the direction of a pixel.
+Antialiasing jitters the target of the ray within a pixel. This is a brief
+computation that happens once per pixel per iteration; it has little impact on
+performance.
+
+![](imgs/antialiasing.PNG)
+
+*(Enlargement of antialiasing along wall corners (right) vs no antialiasing (left))*
+
+
+### Stream compaction.
+
+This pathtracer uses `thrust::stable_partition` to perform stream compaction on dead
+rays. Reference: https://pbr-book.org/. The performance gain using stream compaction is analyzed using open scenes (room that has the front wall removed) vs closed scenes (room enclosed by walls as shown in previous images) and strong ceiling lighting (10x10, entire ceiling) and normal lighting (6x6).
+
+![](imgs/stream_compact.PNG)
+
+As we can see from the graph, the type of scene has a large effect on the number of rays alive in a scene, as expected. The type of scene (closed/open) tends to define the trend line for how many rays terminate at each bounce (by leaving the scene). The type of light defines how many rays terminate on the very first ray shoot, but only has a small effect on the trend line on rays terminating. The effect of stream compaction, however, becomes obvious as the bounce depth increases regardless of the scenes, and at a depth of 14, we can at least expect a performance increase of 50% assuming the calculation of "dying rays" cost similar to that of normal rays.
+
+## Run
+
+To build: `make`
+
+To run: `build/cis565_path_tracer SCENE_FILE.txt`.
+
+Some sample scene files are found in `scenes/`.
+
+## Feedback
+
+Any feedback regarding the correctness of code or the pathtracer in general is welcome.
\ No newline at end of file
diff --git a/imgs/941.PNG b/imgs/941.PNG
new file mode 100644
index 00000000..8028dff2
Binary files /dev/null and b/imgs/941.PNG differ
diff --git a/imgs/antialiased.PNG b/imgs/antialiased.PNG
new file mode 100644
index 00000000..1afd29fe
Binary files /dev/null and b/imgs/antialiased.PNG differ
diff --git a/imgs/antialiasing.PNG b/imgs/antialiasing.PNG
new file mode 100644
index 00000000..b941a76c
Binary files /dev/null and b/imgs/antialiasing.PNG differ
diff --git a/imgs/blur.PNG b/imgs/blur.PNG
new file mode 100644
index 00000000..98e804fc
Binary files /dev/null and b/imgs/blur.PNG differ
diff --git a/imgs/cornell.PNG b/imgs/cornell.PNG
new file mode 100644
index 00000000..41534f8c
Binary files /dev/null and b/imgs/cornell.PNG differ
diff --git a/imgs/cornell317.PNG b/imgs/cornell317.PNG
new file mode 100644
index 00000000..f6348882
Binary files /dev/null and b/imgs/cornell317.PNG differ
diff --git a/imgs/cornell663.PNG b/imgs/cornell663.PNG
new file mode 100644
index 00000000..d904f1e9
Binary files /dev/null and b/imgs/cornell663.PNG differ
diff --git a/imgs/depth_aliasing.PNG b/imgs/depth_aliasing.PNG
new file mode 100644
index 00000000..6a36c6d4
Binary files /dev/null and b/imgs/depth_aliasing.PNG differ
diff --git a/imgs/direct_light.PNG b/imgs/direct_light.PNG
new file mode 100644
index 00000000..b2502c15
Binary files /dev/null and b/imgs/direct_light.PNG differ
diff --git a/imgs/refractive.PNG b/imgs/refractive.PNG
new file mode 100644
index 00000000..d520f1c1
Binary files /dev/null and b/imgs/refractive.PNG differ
diff --git a/imgs/stream_compact.PNG b/imgs/stream_compact.PNG
new file mode 100644
index 00000000..55a78ce0
Binary files /dev/null and b/imgs/stream_compact.PNG differ
diff --git a/imgs/triang.PNG b/imgs/triang.PNG
new file mode 100644
index 00000000..066c6592
Binary files /dev/null and b/imgs/triang.PNG differ
diff --git a/scenes/cornell.txt b/scenes/cornell.txt
index 83ff8202..ce91d487 100644
--- a/scenes/cornell.txt
+++ b/scenes/cornell.txt
@@ -43,9 +43,9 @@ MATERIAL 4
 RGB         .98 .98 .98
 SPECEX      0
 SPECRGB     .98 .98 .98
-REFL        1
-REFR        0
-REFRIOR     0
+REFL        0
+REFR        1
+REFRIOR     1.5
 EMITTANCE   0
 
 // Camera
diff --git a/scenes/diff.txt b/scenes/diff.txt
new file mode 100644
index 00000000..65b9ae05
--- /dev/null
+++ b/scenes/diff.txt
@@ -0,0 +1,171 @@
+// Emissive material (light)
+MATERIAL 0
+RGB         1 1 1
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   5
+
+// Diffuse white
+MATERIAL 1
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse red
+MATERIAL 2
+RGB         .85 .35 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Diffuse green
+MATERIAL 3
+RGB         .35 .85 .35
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Specular white
+MATERIAL 4
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .98 .98 .98
+REFL        1
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Glass
+MATERIAL 5
+RGB         1.0 1.0 1.0
+SPECEX      0
+SPECRGB     1.0 1.0 1.0
+REFL        0
+REFR        1
+REFRIOR     1.5
+EMITTANCE   0
+
+// Blue Glass
+MATERIAL 6
+RGB         .98 .98 .98
+SPECEX      0
+SPECRGB     .85 .85 .98
+REFL        0
+REFR        1
+REFRIOR     1.65
+EMITTANCE   0
+
+// Diffuse Blue
+MATERIAL 7
+RGB         .35 .85 .85
+SPECEX      0
+SPECRGB     0 0 0
+REFL        0
+REFR        0
+REFRIOR     0
+EMITTANCE   0
+
+// Camera
+CAMERA
+RES         800 800
+FOVY        45
+ITERATIONS  5000
+DEPTH       8
+FILE        cornell
+EYE         0.0 5 10.5
+LOOKAT      0 5 0
+UP          0 1 0
+
+
+// Ceiling light
+OBJECT 0
+cube
+material 0
+TRANS       0 10 0
+ROTAT       0 0 0
+SCALE       3 .3 3
+
+// Floor
+OBJECT 1
+cube
+material 1
+TRANS       0 0 0
+ROTAT       0 0 0
+SCALE       16 .01 16
+
+// Ceiling
+OBJECT 2
+cube
+material 1
+TRANS       0 10 0
+ROTAT       0 0 90
+SCALE       .01 16 16
+
+// Back wall
+OBJECT 3
+cube
+material 1
+TRANS       0 5 -5
+ROTAT       0 90 0
+SCALE       .01 16 16
+
+// Left wall
+OBJECT 4
+cube
+material 2
+TRANS       -8 5 0
+ROTAT       0 0 0
+SCALE       .01 16 16
+
+// Right wall
+OBJECT 5
+cube
+material 3
+TRANS       8 5 0
+ROTAT       0 0 0
+SCALE       .01 16 16
+
+// Sphere
+//OBJECT 6
+//sphere
+//material 6
+//TRANS       0 5 5
+//ROTAT       0 0 0
+//SCALE       3 3 3
+
+// Sphere 2
+//OBJECT 7
+//sphere
+//material 7
+//TRANS       -3 2 -1
+//ROTAT       0 0 0
+//SCALE       3 3 3
+
+// Sphere 3
+//OBJECT 8
+//sphere
+//material 6
+//TRANS       2 3 6
+//ROTAT       0 0 0
+//SCALE       3 3 3
+
+//outside light
+OBJECT 6
+cube
+material 0
+TRANS       0 4 12
+ROTAT       90 0 0
+SCALE       3 .3 3
diff --git a/scenes/sphere.txt b/scenes/sphere.txt
index a74b5458..e6ec7acf 100644
--- a/scenes/sphere.txt
+++ b/scenes/sphere.txt
@@ -1,6 +1,6 @@
 // Emissive material (light)
 MATERIAL 0
-RGB         1 1 1
+RGB         0 1 0
 SPECEX      0
 SPECRGB     0 0 0
 REFL        0
@@ -25,4 +25,14 @@ sphere
 material 0
 TRANS       0 0 0
 ROTAT       0 0 0
-SCALE       3 3 3
+SCALE       1 1 1
+
+// mesh
+OBJECT 7
+cube.obj
+material 0
+FILENAME cube.obj
+TRANS 2 2 1
+ROTAT 0 0 0
+SCALE 3 3 3
+
diff --git a/src/glslUtility.cpp b/src/glslUtility.cpp
index ae28ec41..80035d14 100644
--- a/src/glslUtility.cpp
+++ b/src/glslUtility.cpp
@@ -15,22 +15,22 @@ namespace glslUtility {
 
 // embedded passthrough shaders so that default passthrough shaders don't need to be loaded
 static std::string passthroughVS =
-    "   attribute vec4 Position; \n"
-    "   attribute vec2 Texcoords; \n"
-    "   varying vec2 v_Texcoords; \n"
-    "   \n"
-    "   void main(void){ \n"
-    "       v_Texcoords = Texcoords; \n"
-    "       gl_Position = Position; \n"
-    "   }";
+    "	attribute vec4 Position; \n"
+    "	attribute vec2 Texcoords; \n"
+    "	varying vec2 v_Texcoords; \n"
+    "	\n"
+    "	void main(void){ \n"
+    "		v_Texcoords = Texcoords; \n"
+    "		gl_Position = Position; \n"
+    "	}";
 static std::string passthroughFS =
-    "   varying vec2 v_Texcoords; \n"
-    "   \n"
-    "   uniform sampler2D u_image; \n"
-    "   \n"
-    "   void main(void){ \n"
-    "       gl_FragColor = texture2D(u_image, v_Texcoords); \n"
-    "   }";
+    "	varying vec2 v_Texcoords; \n"
+    "	\n"
+    "	uniform sampler2D u_image; \n"
+    "	\n"
+    "	void main(void){ \n"
+    "		gl_FragColor = texture2D(u_image, v_Texcoords); \n"
+    "	}";
 
 typedef struct {
     GLuint vertex;
diff --git a/src/glslUtility.hpp b/src/glslUtility.hpp
index d84433d1..6b856fb7 100644
--- a/src/glslUtility.hpp
+++ b/src/glslUtility.hpp
@@ -11,7 +11,7 @@ namespace glslUtility {
 
 GLuint createDefaultProgram(const char *attributeLocations[], GLuint numberOfLocations);
 GLuint createProgram(const char *vertexShaderPath, const char *fragmentShaderPath,
-                     const char *attributeLocations[], GLuint numberOfLocations);
+	const char *attributeLocations[], GLuint numberOfLocations);
 }
 
-#endif
+#endif
\ No newline at end of file
diff --git a/src/interactions.h b/src/interactions.h
index f969e458..da0a8f0f 100644
--- a/src/interactions.h
+++ b/src/interactions.h
@@ -47,11 +47,11 @@ glm::vec3 calculateRandomDirectionInHemisphere(
  * A perfect specular surface scatters in the reflected ray direction.
  * In order to apply multiple effects to one surface, probabilistically choose
  * between them.
- *
+ * 
  * The visual effect you want is to straight-up add the diffuse and specular
  * components. You can do this in a few ways. This logic also applies to
  * combining other types of materias (such as refractive).
- *
+ * 
  * - Always take an even (50/50) split between a each effect (a diffuse bounce
  *   and a specular bounce), but divide the resulting color of either branch
  *   by its probability (0.5), to counteract the chance (0.5) of the branch
@@ -66,9 +66,60 @@ glm::vec3 calculateRandomDirectionInHemisphere(
  *
  * You may need to change the parameter list for your purposes!
  */
+
+__host__ __device__
+float schlickEquation(float ior, float n, float cos) {
+    float r0 = (n - ior) / (n + ior);
+    r0 = r0 * r0;
+    return r0 + (1.f - r0) * glm::pow(1.f - cos, 5.f);
+}
+
+__host__ __device__
+void refractScatter(PathSegment& path, const Material& m, glm::vec3 intersect, glm::vec3 normal, thrust::default_random_engine& rng) {
+    thrust::uniform_real_distribution<float> u01(0, 1);
+    float num = u01(rng);
+    float n = 1.f; 
+    float probability;
+    glm::vec3 normal2 = normal;
+    float ior = m.indexOfRefraction;
+    
+    float cos = glm::clamp(glm::dot(path.ray.direction, normal), -1.f, 1.f);
+
+    if (cos >= 0.f) {
+        normal2 = -normal;
+        n = ior;
+        ior = 1.f;
+    }
+    else {
+        cos = glm::abs(cos);
+    }
+
+    glm::vec3 reflect = glm::normalize(glm::reflect(path.ray.direction, normal2));
+    float x = n / ior;
+    float sin = glm::sqrt(glm::max(0.f, 1.f - cos * cos));
+
+    if (x * sin < 1.f) {
+        //schlick equation
+        probability = schlickEquation(ior, n, cos);
+
+        if (num < probability) {
+            path.ray.direction = reflect;
+        }
+        else {
+            path.ray.direction = glm::refract(path.ray.direction, normal2, x);
+        }
+    }
+    else {
+        path.ray.direction = reflect;
+    }
+
+    path.ray.origin = intersect + (path.ray.direction * 0.01f);
+    path.color *= m.specular.color;
+}
+
 __host__ __device__
 void scatterRay(
-        PathSegment & pathSegment,
+		PathSegment & pathSegment,
         glm::vec3 intersect,
         glm::vec3 normal,
         const Material &m,
@@ -76,4 +127,23 @@ void scatterRay(
     // TODO: implement this.
     // A basic implementation of pure-diffuse shading will just call the
     // calculateRandomDirectionInHemisphere defined above.
+        //specular
+        if (m.hasReflective > 0) {
+            // unit direction
+            glm::vec3 dir_specular = glm::normalize(glm::reflect(pathSegment.ray.direction, normal)); 
+            pathSegment.ray.direction = dir_specular;
+            pathSegment.ray.origin = intersect + 0.0001f * normal;
+            pathSegment.color *= m.specular.color;
+        }
+        else if (m.hasRefractive > 0) {
+            //refractive
+            refractScatter(pathSegment, m, intersect, normal, rng);
+        }
+        else {
+            //pure diffuse
+            glm::vec3 dir_diffuse = calculateRandomDirectionInHemisphere(normal, rng);
+            pathSegment.ray.direction = dir_diffuse;
+            pathSegment.ray.origin = intersect + 0.0001f * normal;
+            pathSegment.color *= m.color;
+        }
 }
diff --git a/src/intersections.h b/src/intersections.h
index b1504071..a4903858 100644
--- a/src/intersections.h
+++ b/src/intersections.h
@@ -142,3 +142,18 @@ __host__ __device__ float sphereIntersectionTest(Geom sphere, Ray r,
 
     return glm::length(r.origin - intersectionPoint);
 }
+
+__host__ __device__ float triangleIntersectionTest(Geom triangle, Ray r, glm::vec3& intersectionPoint, glm::vec3& normal, bool& outside) {
+    glm::vec3 pt1 = glm::vec3(triangle.transform * glm::vec4(triangle.triangle.pt1.pos, 1.0f));
+    glm::vec3 pt2 = glm::vec3(triangle.transform * glm::vec4(triangle.triangle.pt2.pos, 1.0f));
+    glm::vec3 pt3 = glm::vec3(triangle.transform * glm::vec4(triangle.triangle.pt3.pos, 1.0f));
+
+    glm::vec3 inter;
+    bool intersects = glm::intersectRayTriangle(r.origin, r.direction, pt1, pt2, pt3, inter);
+    if (!intersects) return -1.f;
+
+    float z = 1.0f - inter.x - inter.y;
+    intersectionPoint = inter.x * pt1 + inter.y * pt2 + z * pt3;
+    normal = glm::normalize(glm::cross(pt2 - pt1, pt3 - pt1));
+    return inter.z;
+}
diff --git a/src/pathtrace.cu b/src/pathtrace.cu
index 056e1467..90c03213 100644
--- a/src/pathtrace.cu
+++ b/src/pathtrace.cu
@@ -16,6 +16,12 @@
 
 #define ERRORCHECK 1
 
+#define ANTI_ALIASING 0
+#define CACHE_BOUNCE 0
+#define SORT_MATERIALS 0
+#define DEPTH_OF_FIELD 0
+#define DIRECT_LIGHTING 1
+
 #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
 #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__)
 void checkCUDAErrorFn(const char *msg, const char *file, int line) {
@@ -65,6 +71,7 @@ __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution,
         pbo[index].y = color.y;
         pbo[index].z = color.z;
     }
+    // checkCUDAError("sendImageToPBO");
 }
 
 static Scene * hst_scene = NULL;
@@ -73,6 +80,10 @@ static Geom * dev_geoms = NULL;
 static Material * dev_materials = NULL;
 static PathSegment * dev_paths = NULL;
 static ShadeableIntersection * dev_intersections = NULL;
+static ShadeableIntersection* dev_intersection_first_bounce = NULL;
+#if DIRECT_LIGHTING
+static Geom* dev_lights = NULL;
+#endif
 // TODO: static variables for device memory, any extra info you need, etc
 // ...
 
@@ -84,33 +95,50 @@ void pathtraceInit(Scene *scene) {
     cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3));
     cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3));
 
-    cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment));
+  	cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment));
 
-    cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
-    cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);
+  	cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
+  	cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);
 
-    cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material));
-    cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice);
-
-    cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection));
-    cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
+  	cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material));
+  	cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice);
 
+  	cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection));
+  	cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
     // TODO: initialize any extra device memeory you need
+#if CACHE_BOUNCE || SORT_MATERIALS
+    cudaMalloc(&dev_first_bounce, pixelcount * sizeof(ShadeableIntersection));
+    cudaMemset(dev_first_bounce, 0, pixelcount * sizeof(ShadeableIntersection));
+#endif
+
+#if DIRECT_LIGHTING
+    cudaMalloc(&dev_lights, scene->lights.size() * sizeof(Geom));
+    cudaMemcpy(dev_lights, scene->lights.data(), scene->lights.size() * sizeof(Geom), cudaMemcpyHostToDevice);
+#endif
 
     checkCUDAError("pathtraceInit");
+
 }
 
 void pathtraceFree() {
     cudaFree(dev_image);  // no-op if dev_image is null
-    cudaFree(dev_paths);
-    cudaFree(dev_geoms);
-    cudaFree(dev_materials);
-    cudaFree(dev_intersections);
+  	cudaFree(dev_paths);
+  	cudaFree(dev_geoms);
+  	cudaFree(dev_materials);
+  	cudaFree(dev_intersections);
     // TODO: clean up any extra device memory you created
 
     checkCUDAError("pathtraceFree");
 }
 
+
+__host__ __device__
+glm::vec3 pointOnPlane(Geom light, thrust::default_random_engine& rng) {
+    thrust::uniform_real_distribution<float> u01(0, 1);
+    glm::vec2 pt(u01(rng), u01(rng));
+    glm::vec3 planePt = glm::vec3((pt - glm::vec2(0.5f)), 0.f);
+    return glm::vec3(light.transform * glm::vec4(planePt, 1.f));
+}
 /**
 * Generate PathSegments with rays from the camera through the screen into the
 * scene, which is the first bounce of rays.
@@ -121,95 +149,91 @@ void pathtraceFree() {
 */
 __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, PathSegment* pathSegments)
 {
-    int x = (blockIdx.x * blockDim.x) + threadIdx.x;
-    int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+	int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+	int y = (blockIdx.y * blockDim.y) + threadIdx.y;
 
-    if (x < cam.resolution.x && y < cam.resolution.y) {
-        int index = x + (y * cam.resolution.x);
-        PathSegment & segment = pathSegments[index];
+	if (x < cam.resolution.x && y < cam.resolution.y) {
+		int index = x + (y * cam.resolution.x);
+		PathSegment & segment = pathSegments[index];
 
-        segment.ray.origin = cam.position;
+		segment.ray.origin = cam.position;
     segment.color = glm::vec3(1.0f, 1.0f, 1.0f);
 
-        // TODO: implement antialiasing by jittering the ray
-        segment.ray.direction = glm::normalize(cam.view
-            - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f)
-            - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f)
-            );
+		// TODO: implement antialiasing by jittering the ray
+		segment.ray.direction = glm::normalize(cam.view
+			- cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f)
+			- cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f)
+			);
 
-        segment.pixelIndex = index;
-        segment.remainingBounces = traceDepth;
-    }
+		segment.pixelIndex = index;
+		segment.remainingBounces = traceDepth;
+	}
 }
 
 // TODO:
 // computeIntersections handles generating ray intersections ONLY.
 // Generating new rays is handled in your shader(s).
 // Feel free to modify the code below.
-__global__ void computeIntersections(
-    int depth
-    , int num_paths
-    , PathSegment * pathSegments
-    , Geom * geoms
-    , int geoms_size
-    , ShadeableIntersection * intersections
-    )
+__global__ void computeIntersections(int depth, int num_paths, PathSegment * pathSegments, 
+Geom * geoms, int geoms_size, ShadeableIntersection * intersections)
 {
-    int path_index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (path_index < num_paths)
-    {
-        PathSegment pathSegment = pathSegments[path_index];
-
-        float t;
-        glm::vec3 intersect_point;
-        glm::vec3 normal;
-        float t_min = FLT_MAX;
-        int hit_geom_index = -1;
-        bool outside = true;
-
-        glm::vec3 tmp_intersect;
-        glm::vec3 tmp_normal;
-
-        // naive parse through global geoms
-
-        for (int i = 0; i < geoms_size; i++)
-        {
-            Geom & geom = geoms[i];
-
-            if (geom.type == CUBE)
-            {
-                t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
-            }
-            else if (geom.type == SPHERE)
-            {
-                t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
-            }
-            // TODO: add more intersection tests here... triangle? metaball? CSG?
-
-            // Compute the minimum t from the intersection tests to determine what
-            // scene geometry object was hit first.
-            if (t > 0.0f && t_min > t)
-            {
-                t_min = t;
-                hit_geom_index = i;
-                intersect_point = tmp_intersect;
-                normal = tmp_normal;
-            }
-        }
-
-        if (hit_geom_index == -1)
-        {
-            intersections[path_index].t = -1.0f;
-        }
-        else
-        {
-            //The ray hits something
-            intersections[path_index].t = t_min;
-            intersections[path_index].materialId = geoms[hit_geom_index].materialid;
-            intersections[path_index].surfaceNormal = normal;
-        }
-    }
+	int path_index = blockIdx.x * blockDim.x + threadIdx.x;
+
+	if (path_index < num_paths)
+	{
+		PathSegment pathSegment = pathSegments[path_index];
+
+		float t;
+		glm::vec3 intersect_point;
+		glm::vec3 normal;
+		float t_min = FLT_MAX;
+		int hit_geom_index = -1;
+		bool outside = true;
+
+		glm::vec3 tmp_intersect;
+		glm::vec3 tmp_normal;
+
+		// naive parse through global geoms
+
+		for (int i = 0; i < geoms_size; i++)
+		{
+			Geom & geom = geoms[i];
+
+			if (geom.type == CUBE)
+			{
+				t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
+			}
+			else if (geom.type == SPHERE)
+			{
+				t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
+			}
+			// TODO: add more intersection tests here... triangle? metaball? CSG?
+      else if (geom.type == TRIANGLE){
+        t = triangleIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside);
+      }
+			// Compute the minimum t from the intersection tests to determine what
+			// scene geometry object was hit first.
+			if (t > 0.0f && t_min > t)
+			{
+				t_min = t;
+				hit_geom_index = i;
+				intersect_point = tmp_intersect;
+				normal = tmp_normal;
+			}
+		}
+
+		if (hit_geom_index == -1)
+		{
+			intersections[path_index].t = -1.0f;
+		}
+		else
+		{
+			//The ray hits something
+			intersections[path_index].t = t_min;
+			intersections[path_index].materialId = geoms[hit_geom_index].materialid;
+			intersections[path_index].surfaceNormal = normal;
+		}
+	}
 }
 
 // LOOK: "fake" shader demonstrating what you might do with the info in
@@ -221,13 +245,7 @@ __global__ void computeIntersections(
 // Note that this shader does NOT do a BSDF evaluation!
 // Your shaders should handle that - this can allow techniques such as
 // bump mapping.
-__global__ void shadeFakeMaterial (
-  int iter
-  , int num_paths
-    , ShadeableIntersection * shadeableIntersections
-    , PathSegment * pathSegments
-    , Material * materials
-    )
+__global__ void shadeFakeMaterial (int iter, int num_paths, ShadeableIntersection * shadeableIntersections, PathSegment * pathSegments, Material * materials)
 {
   int idx = blockIdx.x * blockDim.x + threadIdx.x;
   if (idx < num_paths)
@@ -238,7 +256,7 @@ __global__ void shadeFakeMaterial (
       // LOOK: this is how you use thrust's RNG! Please look at
       // makeSeededRandomEngine as well.
       thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0);
-      thrust::uniform_real_distribution<float> u01(0, 1);
+      // thrust::uniform_real_distribution<float> u01(0, 1);
 
       Material material = materials[intersection.materialId];
       glm::vec3 materialColor = material.color;
@@ -246,14 +264,21 @@ __global__ void shadeFakeMaterial (
       // If the material indicates that the object was a light, "light" the ray
       if (material.emittance > 0.0f) {
         pathSegments[idx].color *= (materialColor * material.emittance);
+        pathSegments[idx].remainingBounces = 0;
+      } 
+      else if (pathSegments[idx].remainingBounces == 1) {
+          pathSegments[idx].remainingBounces -= 1;
+          pathSegments[idx].color = glm::vec3(0.0f);
       }
       // Otherwise, do some pseudo-lighting computation. This is actually more
       // like what you would expect from shading in a rasterizer like OpenGL.
       // TODO: replace this! you should be able to start with basically a one-liner
       else {
-        float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f));
-        pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
-        pathSegments[idx].color *= u01(rng); // apply some noise because why not
+        scatterRay(pathSegments[idx], pathSegments[idx].ray.origin + pathSegments[idx].ray.direction * intersection.t, intersection.surfaceNormal,
+              material, rng);
+        // float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f));
+        // pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
+        // pathSegments[idx].color *= u01(rng); // apply some noise because why not
       }
     // If there was no intersection, color the ray black.
     // Lots of renderers use 4 channel color, RGBA, where A = alpha, often
@@ -261,6 +286,7 @@ __global__ void shadeFakeMaterial (
     // This can be useful for post-processing and image compositing.
     } else {
       pathSegments[idx].color = glm::vec3(0.0f);
+      pathSegments[idx].remainingBounces = 0;
     }
   }
 }
@@ -268,15 +294,92 @@ __global__ void shadeFakeMaterial (
 // Add the current iteration's output to the overall image
 __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths)
 {
-    int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+
+	if (index < nPaths)
+	{
+		PathSegment iterationPath = iterationPaths[index];
+		image[iterationPath.pixelIndex] += iterationPath.color;
+	}
+}
 
-    if (index < nPaths)
+struct end_condition {
+    __host__ __device__
+        bool operator()(const PathSegment& pathSegment) {
+        return (pathSegment.remainingBounces >= 0);
+    }
+};
+struct compare_materials {
+    __host__ __device__
+        bool operator()(const ShadeableIntersection& m1, const ShadeableIntersection& m2) {
+        return (m1.materialId > m2.materialId);
+    }
+};
+
+__global__ void shadeDirectLighting(int iter, int num_paths, ShadeableIntersection* shadeableIntersections, PathSegment* pathSegments, Material* materials, Geom* lights, int num
+){
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (idx < num_paths)
     {
-        PathSegment iterationPath = iterationPaths[index];
-        image[iterationPath.pixelIndex] += iterationPath.color;
+        if (pathSegments[idx].remainingBounces <= 0) {
+            return;
+        }
+
+        ShadeableIntersection intersection = shadeableIntersections[idx];
+        thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, pathSegments[idx].remainingBounces);
+        PathSegment path = pathSegments[idx];
+
+        if (path.remainingBounces != 2 && path.remainingBounces > 0 && intersection.t > 0.f) {
+
+            thrust::uniform_real_distribution<float> u01(0, 1);
+
+            Material material = materials[intersection.materialId];
+            glm::vec3 materialColor = material.color;
+
+            // If the material indicates that the object was a light, "light" the ray
+            if (material.emittance > 0.0f) {
+                pathSegments[idx].color *= (materialColor * material.emittance);
+                pathSegments[idx].remainingBounces = 0;
+            }
+            else if (pathSegments[idx].remainingBounces == 1) {
+                pathSegments[idx].remainingBounces -= 1;
+                pathSegments[idx].color = glm::vec3(0.0f);
+            }
+            else {
+                pathSegments[idx].remainingBounces -= 1;
+                scatterRay(pathSegments[idx], pathSegments[idx].ray.origin + pathSegments[idx].ray.direction * intersection.t, intersection.surfaceNormal,
+                    material, rng);
+            }
+            
+        }
+        else if (path.remainingBounces == 2 && intersection.t > 0.f) {
+            Material material = materials[intersection.materialId];
+            glm::vec3 materialColor = material.color;
+
+            // If the material indicates that the object was a light, "light" the ray
+            if (material.emittance > 0.0f) {
+                pathSegments[idx].color *= (materialColor * material.emittance);
+                pathSegments[idx].remainingBounces = 0;
+            }
+            else {
+                scatterRay(path, path.ray.origin + path.ray.direction * intersection.t, intersection.surfaceNormal, material, rng);
+                thrust::uniform_real_distribution<float> u01(0, 1);
+                float r = u01(rng);
+                int lightIdx = 0;
+                if (num != 0) {
+                    lightIdx = glm::min((int)glm::floor(r * num), num - 1);
+                }
+                glm::vec3 lightPt = pointOnPlane(lights[lightIdx], rng);
+                path.ray.direction = glm::normalize(lightPt - path.ray.origin);
+                path.remainingBounces--;
+            }
+        }
+        else {
+            pathSegments[idx].color = glm::vec3(0.0f);
+            pathSegments[idx].remainingBounces = 0;
+        }
     }
 }
-
 /**
  * Wrapper for the __global__ call that sets up the kernel calls and does a ton
  * of memory management
@@ -286,7 +389,7 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
     const Camera &cam = hst_scene->state.camera;
     const int pixelcount = cam.resolution.x * cam.resolution.y;
 
-    // 2D block for generating ray from camera
+	// 2D block for generating ray from camera
     const dim3 blockSize2d(8, 8);
     const dim3 blocksPerGrid2d(
             (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
@@ -326,59 +429,79 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
 
     // TODO: perform one iteration of path tracing
 
-    generateRayFromCamera <<<blocksPerGrid2d, blockSize2d >>>(cam, iter, traceDepth, dev_paths);
-    checkCUDAError("generate camera ray");
 
-    int depth = 0;
-    PathSegment* dev_path_end = dev_paths + pixelcount;
-    int num_paths = dev_path_end - dev_paths;
 
-    // --- PathSegment Tracing Stage ---
-    // Shoot ray into scene, bounce between objects, push shading chunks
+	generateRayFromCamera <<<blocksPerGrid2d, blockSize2d >>>(cam, iter, traceDepth, dev_paths);
+	checkCUDAError("generate camera ray");
+
+	int depth = 0;
+	PathSegment* dev_path_end = dev_paths + pixelcount;
+	int num_paths = dev_path_end - dev_paths;
+
+	// --- PathSegment Tracing Stage ---
+	// Shoot ray into scene, bounce between objects, push shading chunks
 
   bool iterationComplete = false;
-    while (!iterationComplete) {
+	while (!iterationComplete) {
 
     // clean shading chunks
     cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
-
     // tracing
-    dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
-    computeIntersections <<<numblocksPathSegmentTracing, blockSize1d>>> (
-        depth
-        , num_paths
-        , dev_paths
-        , dev_geoms
-        , hst_scene->geoms.size()
-        , dev_intersections
-        );
-    checkCUDAError("trace one bounce");
-    cudaDeviceSynchronize();
+    dim3 numBlocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
+    
+    // use cached first intersection
+    if (CACHE_BOUNCE && !ANTI_ALIASING && depth == 0 && iter != 1) {
+        thrust::copy(thrust::device, dev_intersection_first_bounce, dev_intersection_first_bounce + num_paths, dev_intersections);
+
+        //sort by material
+        if (SORT_MATERIALS) {
+            thrust::sort_by_key(thrust::device, dev_intersections, dev_intersections + num_paths, dev_paths, compare_materials());
+        }
+    }else {
+        // clean shading chunks
+        cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
+        computeIntersections <<<numBlocksPathSegmentTracing, blockSize1d>>> (depth, num_paths, dev_paths, dev_geoms, hst_scene->geoms.size(), dev_intersections);
+        checkCUDAError("trace one bounce");
+        cudaDeviceSynchronize();
+
+        //cache first bounce
+        if (CACHE_BOUNCE && !ANTI_ALIASING && depth == 0 && iter == 1) {
+            thrust::copy(thrust::device, dev_intersections, dev_intersections + num_paths, dev_intersection_first_bounce);
+        }
+        //sort by material
+        if (SORT_MATERIALS) {
+            thrust::sort_by_key(thrust::device, dev_intersections, dev_intersections + num_paths, dev_paths, compare_materials());
+        }
+    }
     depth++;
 
-
-    // TODO:
-    // --- Shading Stage ---
-    // Shade path segments based on intersections and generate new rays by
+#if DIRECT_LIGHTING
+    shadeDirectLighting<<<numBlocksPathSegmentTracing, blockSize1d>>>(iter, num_paths, dev_intersections, dev_paths, 
+                                                                            dev_materials, dev_lights, hst_scene->lights.size());
+#else
+    shadeFakeMaterial<<<numBlocksPathSegmentTracing, blockSize1d>>> (iter, num_paths, dev_intersections, dev_paths, dev_materials);
+#endif
+    dev_path_end = thrust::stable_partition(thrust::device, dev_paths, dev_path_end, end_condition());
+    num_paths = dev_path_end - dev_paths;
+    if (num_paths == 0 || depth > traceDepth) {
+        iterationComplete = true;
+    }
+	// TODO:
+	// --- Shading Stage ---
+	// Shade path segments based on intersections and generate new rays by
   // evaluating the BSDF.
   // Start off with just a big kernel that handles all the different
   // materials you have in the scenefile.
   // TODO: compare between directly shading the path segments and shading
   // path segments that have been reshuffled to be contiguous in memory.
+// shade for direct lighting
 
-  shadeFakeMaterial<<<numblocksPathSegmentTracing, blockSize1d>>> (
-    iter,
-    num_paths,
-    dev_intersections,
-    dev_paths,
-    dev_materials
-  );
-  iterationComplete = true; // TODO: should be based off stream compaction results.
-    }
+  // iterationComplete = true; // TODO: should be based off stream compaction results.
+	}
 
   // Assemble this iteration and apply it to the image
   dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d;
-    finalGather<<<numBlocksPixels, blockSize1d>>>(num_paths, dev_image, dev_paths);
+	finalGather<<<numBlocksPixels, blockSize1d>>>(num_paths, dev_image, dev_paths);
 
     ///////////////////////////////////////////////////////////////////////////
 
@@ -391,3 +514,6 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
 
     checkCUDAError("pathtrace");
 }
+
+
+
diff --git a/src/scene.cpp b/src/scene.cpp
index 3fb6239a..cbae043c 100644
--- a/src/scene.cpp
+++ b/src/scene.cpp
@@ -135,9 +135,9 @@ int Scene::loadCamera() {
     float fovx = (atan(xscaled) * 180) / PI;
     camera.fov = glm::vec2(fovx, fovy);
 
-    camera.right = glm::normalize(glm::cross(camera.view, camera.up));
-    camera.pixelLength = glm::vec2(2 * xscaled / (float)camera.resolution.x,
-                                   2 * yscaled / (float)camera.resolution.y);
+	camera.right = glm::normalize(glm::cross(camera.view, camera.up));
+	camera.pixelLength = glm::vec2(2 * xscaled / (float)camera.resolution.x
+							, 2 * yscaled / (float)camera.resolution.y);
 
     camera.view = glm::normalize(camera.lookAt - camera.position);
 
diff --git a/src/scene.h b/src/scene.h
index f29a9171..9acaf2ed 100644
--- a/src/scene.h
+++ b/src/scene.h
@@ -22,5 +22,6 @@ class Scene {
 
     std::vector<Geom> geoms;
     std::vector<Material> materials;
+    std::vector<Geom> lights;
     RenderState state;
 };
diff --git a/src/sceneStructs.h b/src/sceneStructs.h
index da4dbf30..97b2613a 100644
--- a/src/sceneStructs.h
+++ b/src/sceneStructs.h
@@ -10,6 +10,7 @@
 enum GeomType {
     SPHERE,
     CUBE,
+    TRIANGLE,
 };
 
 struct Ray {
@@ -17,6 +18,18 @@ struct Ray {
     glm::vec3 direction;
 };
 
+struct Point {
+    glm::vec3 pos;
+    glm::vec3 nor;
+    glm::vec2 uv;
+};
+
+struct Triangle {
+    Point pt1;
+    Point pt2;
+    Point pt3;
+};
+
 struct Geom {
     enum GeomType type;
     int materialid;
@@ -26,6 +39,7 @@ struct Geom {
     glm::mat4 transform;
     glm::mat4 inverseTransform;
     glm::mat4 invTranspose;
+    Triangle triangle;
 };
 
 struct Material {
@@ -60,10 +74,10 @@ struct RenderState {
 };
 
 struct PathSegment {
-    Ray ray;
-    glm::vec3 color;
-    int pixelIndex;
-    int remainingBounces;
+	Ray ray;
+	glm::vec3 color;
+	int pixelIndex;
+	int remainingBounces;
 };
 
 // Use with a corresponding PathSegment to do: