From f2e98c3bf356abf8068329b9d8d88ce92d7e083d Mon Sep 17 00:00:00 2001 From: childsb Date: Mon, 18 Nov 2013 14:22:06 -0600 Subject: [PATCH 1/5] Set the default buffer size (if not specified) to 128k --- .../java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java b/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java index eda52014..2106c117 100644 --- a/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java +++ b/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java @@ -85,6 +85,11 @@ public void setConf(Configuration conf){ Path workingDirectory = getInitialWorkingDirectory(); mkdirs(workingDirectory); + String buffy = conf.get("io.file.buffer.size", null); + if(buffy==null || "".compareTo(buffy)==0){ + conf.set("io.file.buffer.size", Integer.toString(1024 * 128)); + } + //volName=conf.get("fs.glusterfs.volname", null); //remoteGFSServer=conf.get("fs.glusterfs.server", null); From 953b1ece24b42dd218fdce259c6ad1203ea2f7a9 Mon Sep 17 00:00:00 2001 From: childsb Date: Mon, 18 Nov 2013 14:49:32 -0600 Subject: [PATCH 2/5] Added unit test, added logging of default buffer size when set. --- .../hadoop/fs/glusterfs/GlusterVolume.java | 1 + .../fs/test/unit/HcfsFileSystemTest.java | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java b/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java index 2106c117..c18bfad4 100644 --- a/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java +++ b/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java @@ -88,6 +88,7 @@ public void setConf(Configuration conf){ String buffy = conf.get("io.file.buffer.size", null); if(buffy==null || "".compareTo(buffy)==0){ conf.set("io.file.buffer.size", Integer.toString(1024 * 128)); + log.info("Set default buffer size:" + conf.get("io.file.buffer.size")); } //volName=conf.get("fs.glusterfs.volname", null); diff --git a/src/test/java/org/apache/hadoop/fs/test/unit/HcfsFileSystemTest.java b/src/test/java/org/apache/hadoop/fs/test/unit/HcfsFileSystemTest.java index 44210f6d..f2eae2d0 100644 --- a/src/test/java/org/apache/hadoop/fs/test/unit/HcfsFileSystemTest.java +++ b/src/test/java/org/apache/hadoop/fs/test/unit/HcfsFileSystemTest.java @@ -65,6 +65,28 @@ public static void after() throws IOException{ } + @Test + public void test() throws Exception { + Path out = new Path("a"); + + FSDataOutputStream os = fs.create(out); + + int written=0; + /** + * Assert that writes smaller than 10KB are NOT spilled to disk + */ + while(written<10000){ + os.write("ASDF".getBytes()); + written+="ASDF".getBytes().length; + //now, we expect + Assert.assertTrue("asserting that file not written yet",fs.getLength(out)==0); + } + os.flush(); + Assert.assertTrue("asserting that file not written yet",fs.getLength(out)>=10000); + + os.close(); + } + @org.junit.Test public void testTolerantMkdirs() throws Exception{ Path longPath=new Path("a/b/c/d"); From 7355a252361e5ded972d12358023764d6d4d6f9c Mon Sep 17 00:00:00 2001 From: childsb Date: Mon, 18 Nov 2013 16:33:40 -0600 Subject: [PATCH 3/5] cleanup buffer spill test --- .../hadoop/fs/test/unit/HcfsFileSystemTest.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/apache/hadoop/fs/test/unit/HcfsFileSystemTest.java b/src/test/java/org/apache/hadoop/fs/test/unit/HcfsFileSystemTest.java index f2eae2d0..abd7e67f 100644 --- a/src/test/java/org/apache/hadoop/fs/test/unit/HcfsFileSystemTest.java +++ b/src/test/java/org/apache/hadoop/fs/test/unit/HcfsFileSystemTest.java @@ -25,6 +25,7 @@ package org.apache.hadoop.fs.test.unit; +import static org.apache.hadoop.fs.FileSystemTestHelper.getTestRootPath; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -40,6 +41,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.test.connector.HcfsTestConnectorFactory; import org.apache.hadoop.fs.test.connector.HcfsTestConnectorInterface; +import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -62,11 +64,15 @@ public static void setup() throws Exception { @AfterClass public static void after() throws IOException{ fs.close(); - } + @After + public void tearDown() throws Exception { + fs.delete(getTestRootPath(fs, "test"),true); + } + @Test - public void test() throws Exception { + public void testBufferSpill() throws Exception { Path out = new Path("a"); FSDataOutputStream os = fs.create(out); @@ -85,6 +91,7 @@ public void test() throws Exception { Assert.assertTrue("asserting that file not written yet",fs.getLength(out)>=10000); os.close(); + fs.delete(out); } @org.junit.Test From 8f909af23237fb5e898a526f65899fd990453b92 Mon Sep 17 00:00:00 2001 From: jayunit100 Date: Tue, 19 Nov 2013 07:47:48 -0500 Subject: [PATCH 4/5] Revised unit test to simply read from io.file.buffer.size --- .../fs/test/unit/HCFSPerformanceIOTests.java | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 src/test/java/org/apache/hadoop/fs/test/unit/HCFSPerformanceIOTests.java diff --git a/src/test/java/org/apache/hadoop/fs/test/unit/HCFSPerformanceIOTests.java b/src/test/java/org/apache/hadoop/fs/test/unit/HCFSPerformanceIOTests.java new file mode 100644 index 00000000..ef66877d --- /dev/null +++ b/src/test/java/org/apache/hadoop/fs/test/unit/HCFSPerformanceIOTests.java @@ -0,0 +1,75 @@ +package org.apache.hadoop.fs.test.unit; + +import static org.apache.hadoop.fs.FileSystemTestHelper.getTestRootPath; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.test.connector.HcfsTestConnectorFactory; +import org.apache.hadoop.fs.test.connector.HcfsTestConnectorInterface; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * A class for performance and IO related unit tests: + * + * - Write buffering + * - Read buffering + * - Object caching / File lookup caching. + * - Seeking + */ +public class HCFSPerformanceIOTests { + + static FileSystem fs ; + + @BeforeClass + public static void setup() throws Exception { + HcfsTestConnectorInterface connector = HcfsTestConnectorFactory.getHcfsTestConnector(); + fs= connector.create(); + } + + @AfterClass + public static void after() throws IOException{ + fs.close(); + } + + @After + public void tearDown() throws Exception { + fs.delete(getTestRootPath(fs, "test"),true); + } + + @Test + public void testBufferSpill() throws Exception { + Path out = new Path("a"); + final Integer buffersize = fs.getConf().getInt("io.file.buffer.size", -1); + FSDataOutputStream os = fs.create(out); + + int written=0; + /** + * Assert that writes smaller than 10KB are NOT spilled to disk + */ + while(written=buffersize); + + os.close(); + fs.delete(out); + } +} \ No newline at end of file From 293e784a3d7980208097bb978c618fa72e2556f8 Mon Sep 17 00:00:00 2001 From: jayunit100 Date: Tue, 19 Nov 2013 12:40:16 -0500 Subject: [PATCH 5/5] Update to buffering: Dont allow 4096 byte write buffers, encode an OPTIMAL constant that is used Tracker item 812924 --- .../hadoop/fs/glusterfs/GlusterVolume.java | 34 ++++++---- .../GlusterFileSystemTestConnector.java | 6 +- .../fs/test/unit/HCFSPerformanceIOTests.java | 63 ++++++++++++------- 3 files changed, 68 insertions(+), 35 deletions(-) diff --git a/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java b/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java index c18bfad4..f550a2ec 100644 --- a/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java +++ b/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterVolume.java @@ -38,8 +38,16 @@ public class GlusterVolume extends RawLocalFileSystem{ + static final Logger log = LoggerFactory.getLogger(GlusterVolume.class); - static final Logger log = LoggerFactory.getLogger(GlusterFileSystemCRC.class); + /** + * General reason for these constants is to help us decide + * when to override the specified buffer size. See implementation + * of logic below, which might change overtime. + */ + public static final int OVERRIDE_WRITE_BUFFER_SIZE = 1024 * 4; + public static final int OPTIMAL_WRITE_BUFFER_SIZE = 1024 * 128; + public static final URI NAME = URI.create("glusterfs:///"); protected String root=null; @@ -81,20 +89,22 @@ public void setConf(Configuration conf){ mkdirs(mapredSysDirectory); } - /* ensure the initial working directory exists */ + /** + * Ensure the initial working directory exists + **/ Path workingDirectory = getInitialWorkingDirectory(); mkdirs(workingDirectory); - - String buffy = conf.get("io.file.buffer.size", null); - if(buffy==null || "".compareTo(buffy)==0){ - conf.set("io.file.buffer.size", Integer.toString(1024 * 128)); - log.info("Set default buffer size:" + conf.get("io.file.buffer.size")); + + /** + * Write Buffering + */ + Integer userBufferSize=conf.getInt("io.file.buffer.size", -1); + if(userBufferSize == OVERRIDE_WRITE_BUFFER_SIZE || userBufferSize == -1) { + conf.setInt("io.file.buffer.size", OPTIMAL_WRITE_BUFFER_SIZE); } - - //volName=conf.get("fs.glusterfs.volname", null); - //remoteGFSServer=conf.get("fs.glusterfs.server", null); - - }catch (Exception e){ + log.info("Write buffer size : " +conf.getInt("io.file.buffer.size",-1)) ; + } + catch (Exception e){ throw new RuntimeException(e); } } diff --git a/src/test/java/org/apache/hadoop/fs/test/connector/glusterfs/GlusterFileSystemTestConnector.java b/src/test/java/org/apache/hadoop/fs/test/connector/glusterfs/GlusterFileSystemTestConnector.java index 77dafb1e..82cd00ee 100644 --- a/src/test/java/org/apache/hadoop/fs/test/connector/glusterfs/GlusterFileSystemTestConnector.java +++ b/src/test/java/org/apache/hadoop/fs/test/connector/glusterfs/GlusterFileSystemTestConnector.java @@ -1,18 +1,20 @@ package org.apache.hadoop.fs.test.connector.glusterfs; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.glusterfs.GlusterVolume; import org.apache.hadoop.fs.test.connector.HcfsTestConnector; /** * A HCFS test connector specifically for instantiation and testing Glusterfs. */ public class GlusterFileSystemTestConnector extends HcfsTestConnector{ - - public Configuration createConfiguration(){ + + public Configuration createConfiguration(){ Configuration c = super.createConfiguration(); c.set("fs.glusterfs.mount",System.getProperty("GLUSTER_MOUNT")); c.set("fs.glusterfs.impl","org.apache.hadoop.fs.local.GlusterFs"); c.set("fs.default.name","glusterfs:///"); + c.setInt("io.file.buffer.size",GlusterVolume.OVERRIDE_WRITE_BUFFER_SIZE ); return c; } diff --git a/src/test/java/org/apache/hadoop/fs/test/unit/HCFSPerformanceIOTests.java b/src/test/java/org/apache/hadoop/fs/test/unit/HCFSPerformanceIOTests.java index ef66877d..f07b3319 100644 --- a/src/test/java/org/apache/hadoop/fs/test/unit/HCFSPerformanceIOTests.java +++ b/src/test/java/org/apache/hadoop/fs/test/unit/HCFSPerformanceIOTests.java @@ -1,19 +1,13 @@ package org.apache.hadoop.fs.test.unit; import static org.apache.hadoop.fs.FileSystemTestHelper.getTestRootPath; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; import java.io.IOException; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.glusterfs.GlusterVolume; import org.apache.hadoop.fs.test.connector.HcfsTestConnectorFactory; import org.apache.hadoop.fs.test.connector.HcfsTestConnectorInterface; import org.junit.After; @@ -21,9 +15,12 @@ import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; +import org.mortbay.log.Log; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** - * A class for performance and IO related unit tests: + * A class for performance and IO related unit tests. * * - Write buffering * - Read buffering @@ -33,6 +30,7 @@ public class HCFSPerformanceIOTests { static FileSystem fs ; + Logger log = LoggerFactory.getLogger(HCFSPerformanceIOTests.class); @BeforeClass public static void setup() throws Exception { @@ -45,31 +43,54 @@ public static void after() throws IOException{ fs.close(); } + public Path bufferoutpath(){ + return getTestRootPath(fs, "buffering_test"+HCFSPerformanceIOTests.class.getName()); + } + @After public void tearDown() throws Exception { - fs.delete(getTestRootPath(fs, "test"),true); + fs.delete(bufferoutpath(),true); } - + + //String to append to file we are writing. + static final String CONTENT="1234"; + + /** + * This is a complex test. It documents the expected behaviour of the + * FileSystem buffering. + * + * It assumes that the configuration value of FS is == the {@link GlusterVolume} OVERRIDE_WRITE_BUFFER_SIZE. + * Then, it starts writing to a stream. + */ @Test public void testBufferSpill() throws Exception { - Path out = new Path("a"); - final Integer buffersize = fs.getConf().getInt("io.file.buffer.size", -1); - FSDataOutputStream os = fs.create(out); + + /** + * Sanity check: This test expects that an override is being performed, i.e., that + * the buffering is going to be set to the optimal size, because the file system + * detected that the configured original buffer size was == to the "bad default" value which + * we have decide to override, for the sack of "reasonable defaults" out of the box. + */ + Assert.assertEquals( + GlusterVolume.OPTIMAL_WRITE_BUFFER_SIZE, + fs.getConf().getInt("io.file.buffer.size",-1)); + + FSDataOutputStream os = fs.create(bufferoutpath()); int written=0; + /** - * Assert that writes smaller than 10KB are NOT spilled to disk + * Now, we assert that no data is spilled to disk until we reach the optimal size. */ - while(written=buffersize); + + Assert.assertTrue("asserting that is now written... ",fs.getLength(bufferoutpath()) >= GlusterVolume.OPTIMAL_WRITE_BUFFER_SIZE); os.close(); - fs.delete(out); } } \ No newline at end of file