diff --git a/src/plugin.js b/src/plugin.js index 71d2030b..156a1910 100644 --- a/src/plugin.js +++ b/src/plugin.js @@ -157,40 +157,59 @@ async function parse(parser, source, opts) { return new Promise((resolve, reject) => { const socket = new net.Socket(); - let chunks = ""; + let buffer = Buffer.alloc(0); + let expectedLength = null; socket.on("error", (error) => { reject(error); }); socket.on("data", (data) => { - chunks += data.toString("utf-8"); - }); + buffer = Buffer.concat([buffer, data]); - socket.on("end", () => { - const response = JSON.parse(chunks); + // If we haven't read the length header yet + if (expectedLength === null && buffer.length >= 4) { + expectedLength = buffer.readUInt32BE(0); + buffer = buffer.subarray(4); + } - if (response.error) { - const error = new Error(response.error); - if (response.loc) { - error.loc = response.loc; - } + // If we have the complete message + if (expectedLength !== null && buffer.length >= expectedLength) { + const response = JSON.parse( + buffer.toString("utf-8", 0, expectedLength) + ); - reject(error); + if (response.error) { + const error = new Error(response.error); + if (response.loc) { + error.loc = response.loc; + } + reject(error); + } else { + resolve(response); + } } + }); - resolve(response); + socket.on("end", () => { + if (expectedLength === null || buffer.length < expectedLength) { + reject(new Error("Socket closed before receiving complete response")); + } }); socket.connect(connectionOptions, () => { - socket.end( - JSON.stringify({ - parser, - source, - maxwidth: opts.printWidth, - tabwidth: opts.tabWidth - }) - ); + const content = JSON.stringify({ + parser, + source, + maxwidth: opts.printWidth, + tabwidth: opts.tabWidth + }); + const contentBuffer = Buffer.from(content, "utf-8"); + const lengthBuffer = Buffer.allocUnsafe(4); + lengthBuffer.writeUInt32BE(contentBuffer.length, 0); + + socket.write(lengthBuffer); + socket.end(contentBuffer); }); }); } diff --git a/src/server.rb b/src/server.rb index d5876cf7..459ac66e 100644 --- a/src/server.rb +++ b/src/server.rb @@ -71,7 +71,13 @@ # Start up a new thread that will handle each successive connection. Thread.new(server.accept_nonblock) do |socket| - request = JSON.parse(socket.read.force_encoding("UTF-8")) + # Read the length header (4 bytes) + length_bytes = socket.read(4) + expected_length = length_bytes.unpack1("N") + + # Read the content based on the expected length + content = socket.read(expected_length) + request = JSON.parse(content.force_encoding("UTF-8")) source = request["source"] source.each_line do |line| @@ -136,16 +142,28 @@ end if response - socket.write(JSON.fast_generate(response.force_encoding("UTF-8"))) + content = JSON.fast_generate(response.force_encoding("UTF-8")) + content_bytes = content.bytesize + socket.write([content_bytes].pack("N")) + socket.write(content) else - socket.write("{ \"error\": true }") + content = "{ \"error\": true }" + content_bytes = content.bytesize + socket.write([content_bytes].pack("N")) + socket.write(content) end rescue SyntaxTree::Parser::ParseError => error loc = { start: { line: error.lineno, column: error.column } } - socket.write(JSON.fast_generate(error: error.message, loc: loc)) + content = JSON.fast_generate(error: error.message, loc: loc) + content_bytes = content.bytesize + socket.write([content_bytes].pack("N")) + socket.write(content) rescue StandardError => error begin - socket.write(JSON.fast_generate(error: error.message)) + content = JSON.fast_generate(error: error.message) + content_bytes = content.bytesize + socket.write([content_bytes].pack("N")) + socket.write(content) rescue Errno::EPIPE # Do nothing, the pipe has been closed by the parent process so we # don't actually care about writing to it anymore. diff --git a/test/js/ruby/utf8_boundary.test.js b/test/js/ruby/utf8_boundary.test.js new file mode 100644 index 00000000..1a6bef11 --- /dev/null +++ b/test/js/ruby/utf8_boundary.test.js @@ -0,0 +1,19 @@ +describe("UTF-8 handling at 8KB boundary", () => { + test("should handle emoji at 8KB boundary without corruption", () => { + // Create a string where emoji appears right at 8192 byte boundary + // Each 'a' is 1 byte, emoji is 4 bytes + const padding = "a".repeat(8190); + const testCode = `# ${padding}\nputs "🚀 test"`; + + // The formatted result should contain the emoji, not replacement characters + return expect(testCode).toMatchFormat(); + }); + + test("should handle multiple emojis around 8KB boundary", () => { + // Test with emojis before, at, and after 8KB boundary + const beforeBoundary = "a".repeat(8180); + const testCode = `# ${beforeBoundary}\n# 🎨🎭🎪🎯\nputs "test"`; + + return expect(testCode).toMatchFormat(); + }); +});