From d7bae87f073c2a9e6f14d6af26a9c19904dfdee0 Mon Sep 17 00:00:00 2001 From: Ville Lautanala Date: Sat, 3 Jan 2026 21:15:45 +0200 Subject: [PATCH 1/7] Migrate project to use Magnus + rb_sys --- Cargo.lock | 75 ------- Gemfile | 1 - Rakefile | 26 ++- ext/Makefile | 6 - ext/Rakefile | 2 - ext/extconf.rb | 5 - ext/rscsv/Cargo.lock | 343 +++++++++++++++++++++++++++++ Cargo.toml => ext/rscsv/Cargo.toml | 7 +- ext/rscsv/extconf.rb | 5 + ext/rscsv/src/lib.rs | 146 ++++++++++++ lib/rscsv.rb | 6 +- lib/tasks/helix_runtime.rake | 5 - rscsv.gemspec | 35 +-- src/lib.rs | 206 ----------------- 14 files changed, 541 insertions(+), 327 deletions(-) delete mode 100644 Cargo.lock delete mode 100644 ext/Makefile delete mode 100644 ext/Rakefile delete mode 100644 ext/extconf.rb create mode 100644 ext/rscsv/Cargo.lock rename Cargo.toml => ext/rscsv/Cargo.toml (63%) create mode 100644 ext/rscsv/extconf.rb create mode 100644 ext/rscsv/src/lib.rs delete mode 100644 lib/tasks/helix_runtime.rake delete mode 100644 src/lib.rs diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 9896af7..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,75 +0,0 @@ -[[package]] -name = "cstr-macro" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "csv" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.89 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "csv-core" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "helix" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cstr-macro 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)", - "libcruby-sys 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "libc" -version = "0.2.51" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "libcruby-sys" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "memchr" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rscsv" -version = "0.5.0" -dependencies = [ - "csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", - "helix 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "serde" -version = "1.0.89" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[metadata] -"checksum cstr-macro 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db53fddba18cdd35477a7213a3ef6acfbfa333c31b42ce019e544c4a1420a06f" -"checksum csv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "9fd1c44c58078cfbeaf11fbb3eac9ae5534c23004ed770cc4bfb48e658ae4f04" -"checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65" -"checksum helix 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "49a017e3e798ad9386e0a0584e66fd6c04a80ccc1242eb8f689c62ce6f408240" -"checksum libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)" = "bedcc7a809076656486ffe045abeeac163da1b558e963a31e29fbfbeba916917" -"checksum libcruby-sys 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fef6028cdce0c8d55676fd1d66bb810facef8cade0dd71d28511d375e84da4c0" -"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39" -"checksum serde 1.0.89 (registry+https://github.com/rust-lang/crates.io-index)" = "92514fb95f900c9b5126e32d020f5c6d40564c27a5ea6d1d7d9f157a96623560" diff --git a/Gemfile b/Gemfile index b399d93..fa75df1 100644 --- a/Gemfile +++ b/Gemfile @@ -1,4 +1,3 @@ source 'https://rubygems.org' -# Specify your gem's dependencies in rscsv.gemspec gemspec diff --git a/Rakefile b/Rakefile index 9b5918d..00c80c6 100644 --- a/Rakefile +++ b/Rakefile @@ -1,9 +1,27 @@ require 'bundler/gem_tasks' -require 'bundler/setup' require 'rspec/core/rake_task' -import 'lib/tasks/helix_runtime.rake' +require 'rake/extensiontask' +require 'rb_sys' RSpec::Core::RakeTask.new(:spec) -task :spec => :build -task :default => :spec +GEMSPEC = Gem::Specification.load('rscsv.gemspec') + +Rake::ExtensionTask.new('rscsv', GEMSPEC) do |ext| + ext.lib_dir = 'lib/rscsv' + ext.source_pattern = '*.{rs,toml}' + ext.cross_compile = true + ext.cross_platform = %w[ + x86_64-linux + x86_64-linux-musl + aarch64-linux + aarch64-linux-musl + x86_64-darwin + arm64-darwin + x64-mingw-ucrt + x64-mingw32 + ] +end + +task spec: :compile +task default: [:compile, :spec] diff --git a/ext/Makefile b/ext/Makefile deleted file mode 100644 index 8d6e83f..0000000 --- a/ext/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -all: - cd .. && cargo rustc --release -- -C link-args=-Wl,-undefined,dynamic_lookup -clean: - rm -rf ../target - -install: ; diff --git a/ext/Rakefile b/ext/Rakefile deleted file mode 100644 index 27793ad..0000000 --- a/ext/Rakefile +++ /dev/null @@ -1,2 +0,0 @@ -Dir.chdir '..' -import 'lib/tasks/helix_runtime.rake' diff --git a/ext/extconf.rb b/ext/extconf.rb deleted file mode 100644 index a01d24b..0000000 --- a/ext/extconf.rb +++ /dev/null @@ -1,5 +0,0 @@ -if !system('cargo --version') || !system('rustc --version') - raise 'You have to install Rust with Cargo (https://www.rust-lang.org/)' -end - -require 'rake' diff --git a/ext/rscsv/Cargo.lock b/ext/rscsv/Cargo.lock new file mode 100644 index 0000000..a5fb74d --- /dev/null +++ b/ext/rscsv/Cargo.lock @@ -0,0 +1,343 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "bindgen" +version = "0.69.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools", + "lazy_static", + "lazycell", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.179" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5a2d376baa530d1238d133232d15e239abad80d05838b4b59354e5268af431f" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "magnus" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab" +dependencies = [ + "magnus-macros", + "rb-sys", + "rb-sys-env", + "seq-macro", +] + +[[package]] +name = "magnus-macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "proc-macro2" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rb-sys" +version = "0.9.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45fb1a185af97ee456f1c9e56dbe6e2e662bec4fdeaf83c4c28e0e6adfb18816" +dependencies = [ + "rb-sys-build", +] + +[[package]] +name = "rb-sys-build" +version = "0.9.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a58ebd02d7a6033e6a5f6f8d150c1e9f16506039092b84a73e6bedce6d3adf41" +dependencies = [ + "bindgen", + "lazy_static", + "proc-macro2", + "quote", + "regex", + "shell-words", + "syn", +] + +[[package]] +name = "rb-sys-env" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb" + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rscsv" +version = "0.5.0" +dependencies = [ + "csv", + "magnus", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "ryu" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" + +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shell-words" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.112" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" diff --git a/Cargo.toml b/ext/rscsv/Cargo.toml similarity index 63% rename from Cargo.toml rename to ext/rscsv/Cargo.toml index 937798e..3d19ab2 100644 --- a/Cargo.toml +++ b/ext/rscsv/Cargo.toml @@ -2,11 +2,12 @@ name = "rscsv" version = "0.5.0" authors = ["Ville Lautanala "] +edition = "2021" [lib] - crate-type = ["cdylib"] [dependencies] -helix = "0.7.3" -csv = "^1" +magnus = { version = "0.7", features = ["rb-sys"] } +csv = "1" + diff --git a/ext/rscsv/extconf.rb b/ext/rscsv/extconf.rb new file mode 100644 index 0000000..261b9b7 --- /dev/null +++ b/ext/rscsv/extconf.rb @@ -0,0 +1,5 @@ +require "mkmf" +require "rb_sys/mkmf" + +create_rust_makefile("rscsv/rscsv") + diff --git a/ext/rscsv/src/lib.rs b/ext/rscsv/src/lib.rs new file mode 100644 index 0000000..6d3a101 --- /dev/null +++ b/ext/rscsv/src/lib.rs @@ -0,0 +1,146 @@ +use magnus::{ + block::yield_value, function, prelude::*, Error, RArray, RString, Ruby, Value, +}; +use std::io::Read; + +fn generate_lines(rows: Vec>) -> Result { + let mut wtr = csv::WriterBuilder::new().from_writer(vec![]); + for row in rows { + wtr.write_record(&row) + .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?; + } + + let inner = wtr + .into_inner() + .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?; + + String::from_utf8(inner) + .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string())) +} + +fn record_to_ruby_array(record: &csv::ByteRecord) -> Result { + let array = RArray::with_capacity(record.len()); + for column in record.iter() { + let column_str = RString::from_slice(column); + array.push(column_str)?; + } + Ok(array) +} + +struct EnumeratorRead { + enumerator: Value, + buffer: Option>, +} + +impl EnumeratorRead { + fn new(enumerator: Value) -> Self { + EnumeratorRead { + enumerator, + buffer: None, + } + } + + fn read_and_store_overflow(&mut self, buf: &mut [u8], value: &[u8]) -> std::io::Result { + if value.len() > buf.len() { + let (current, next) = value.split_at(buf.len()); + buf.copy_from_slice(current); + self.buffer = Some(next.to_vec()); + Ok(current.len()) + } else { + buf[..value.len()].copy_from_slice(value); + self.buffer = None; + Ok(value.len()) + } + } + + fn read_from_external(&mut self, buf: &mut [u8]) -> std::io::Result { + let result: Result = self.enumerator.funcall("next", ()); + match result { + Ok(string) => self.read_and_store_overflow(buf, string.as_bytes()), + Err(_) => { + // StopIteration or other exception - signal EOF + Ok(0) + } + } + } +} + +impl Read for EnumeratorRead { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + match self.buffer.take() { + Some(ref inner) => self.read_and_store_overflow(buf, inner), + None => self.read_from_external(buf), + } + } +} + +fn csv_reader(reader: R) -> csv::Reader { + csv::ReaderBuilder::new() + .buffer_capacity(16 * 1024) + .has_headers(false) + .from_reader(reader) +} + +fn yield_csv(enumerator: Value) -> Result<(), Error> { + let mut reader = csv_reader(EnumeratorRead::new(enumerator)); + let mut record = csv::ByteRecord::new(); + + loop { + let has_record = reader + .read_byte_record(&mut record) + .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?; + + if !has_record { + break; + } + + let row_array = record_to_ruby_array(&record)?; + let _: Value = yield_value(row_array)?; + } + + Ok(()) +} + +fn parse_csv(data: String) -> Result { + let mut reader = csv_reader(data.as_bytes()); + let result = RArray::new(); + + for record in reader.records() { + let record = record + .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?; + + let row = RArray::with_capacity(record.len()); + for field in record.iter() { + row.push(RString::new(field))?; + } + result.push(row)?; + } + + Ok(result) +} + +fn generate_line(row: Vec) -> Result { + let mut wtr = csv::WriterBuilder::new().from_writer(vec![]); + wtr.write_record(&row) + .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?; + + let inner = wtr + .into_inner() + .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?; + + String::from_utf8(inner) + .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string())) +} + +#[magnus::init] +fn init(ruby: &Ruby) -> Result<(), Error> { + let reader_class = ruby.define_class("RscsvReader", ruby.class_object())?; + reader_class.define_singleton_method("each_internal", function!(yield_csv, 1))?; + reader_class.define_singleton_method("parse", function!(parse_csv, 1))?; + + let writer_class = ruby.define_class("RscsvWriter", ruby.class_object())?; + writer_class.define_singleton_method("generate_line", function!(generate_line, 1))?; + writer_class.define_singleton_method("generate_lines", function!(generate_lines, 1))?; + + Ok(()) +} diff --git a/lib/rscsv.rb b/lib/rscsv.rb index b23495a..f3b64b7 100644 --- a/lib/rscsv.rb +++ b/lib/rscsv.rb @@ -1,6 +1,5 @@ -require 'helix_runtime' -require 'rscsv/native' -require 'rscsv/version' +require_relative 'rscsv/rscsv' +require_relative 'rscsv/version' module Rscsv Reader = RscsvReader @@ -12,5 +11,6 @@ def self.each(input, &block) nil end end + Writer = RscsvWriter end diff --git a/lib/tasks/helix_runtime.rake b/lib/tasks/helix_runtime.rake deleted file mode 100644 index 970d5ef..0000000 --- a/lib/tasks/helix_runtime.rake +++ /dev/null @@ -1,5 +0,0 @@ -require 'helix_runtime/build_task' - -HelixRuntime::BuildTask.new - -task :default => :build diff --git a/rscsv.gemspec b/rscsv.gemspec index 5d7a16c..b85cd16 100644 --- a/rscsv.gemspec +++ b/rscsv.gemspec @@ -2,10 +2,6 @@ lib = File.expand_path('../lib', __FILE__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'rscsv/version' -ruby_sources = Dir['{lib/**/*,[A-Z]*}'] - Dir['Cargo.*', 'Gemfile.lock'] -rust_sources = Dir['{src/**/*,ext/**/*,Cargo.*}'] -native_bundle = Dir['lib/rscsv/native.bundle', 'lib/rscsv/native.so'] - Gem::Specification.new do |spec| spec.name = 'rscsv' spec.version = Rscsv::VERSION @@ -17,21 +13,26 @@ Gem::Specification.new do |spec| spec.homepage = 'https://github.com/lautis/rscsv' spec.license = 'MIT' - if ENV['NATIVE_BUNDLE'] - spec.platform = Gem::Platform.local - spec.files = ruby_sources - else - spec.files = ruby_sources + rust_sources - native_bundle - spec.extensions = Dir['ext/extconf.rb'] - end - - spec.bindir = 'exe' - spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.files = Dir[ + 'lib/**/*.rb', + 'ext/**/*.{rs,rb,toml}', + 'Cargo.toml', + 'Cargo.lock', + 'LICENSE.txt', + 'README.md' + ] + spec.extensions = ['ext/rscsv/extconf.rb'] spec.require_paths = ['lib'] - spec.add_dependency 'helix_runtime', '0.7.5' - spec.add_development_dependency 'bundler', '>= 1.14' - spec.add_development_dependency 'rake', '>= 10.0' + spec.required_ruby_version = '>= 3.0' + + spec.add_dependency 'rb_sys', '~> 0.9' + + spec.add_development_dependency 'bundler', '>= 2.0' + spec.add_development_dependency 'rake', '>= 13.0' + spec.add_development_dependency 'rake-compiler', '~> 1.2' + spec.add_development_dependency 'rake-compiler-dock', '~> 1.5' spec.add_development_dependency 'rspec', '~> 3.0' + spec.add_development_dependency 'csv', '>= 3.0' spec.add_development_dependency 'benchmark-ips', '~> 2.7' end diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index f3723a2..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,206 +0,0 @@ -#[macro_use] -extern crate helix; -extern crate csv; - -use std::error::Error; -use std::io::Read; -use helix::sys; -use helix::sys::{VALUE, RubyException}; -use helix::{FromRuby, CheckResult, ToRuby}; -use helix::libc::{c_void}; - -fn generate_lines(rows: &[Vec]) -> Result> { - let mut wtr = csv::WriterBuilder::new().from_writer(vec![]); - for row in rows { - wtr.write_record(row)?; - } - - Ok(String::from_utf8(wtr.into_inner()?)?) -} - -fn record_to_ruby(record: &csv::ByteRecord) -> VALUE { - let inner_array = unsafe { sys::rb_ary_new_capa(record.len() as isize) }; - for column in record.iter() { - unsafe { - let column_value = - sys::rb_utf8_str_new(column.as_ptr() as *const i8, column.len() as i64); - sys::rb_ary_push(inner_array, column_value); - } - } - inner_array -} - -extern fn protect_wrapper(closure: *mut c_void) -> VALUE - where F: FnOnce() -> VALUE { - let closure_option = closure as *mut Option; - unsafe { - (*closure_option).take().unwrap()() - } - } - -pub fn protect(func: F) -> Result -where - F: FnOnce() -> VALUE, -{ - let mut state = sys::EMPTY_EXCEPTION; - let value = unsafe { - sys::rb_protect( - protect_wrapper::, - &func as *const _ as *mut c_void, - &mut state, - ) - }; - if state == sys::EMPTY_EXCEPTION { - Ok(value) - } else { - Err(state) - } -} - -struct Enumerator { - value: VALUE, -} - -impl FromRuby for Enumerator { - type Checked = Enumerator; - - fn from_ruby(value: VALUE) -> CheckResult { - // TODO: validate this? - Ok(Enumerator { value }) - } - - fn from_checked(checked: Enumerator) -> Enumerator { - checked - } -} - -struct EnumeratorRead { - value: VALUE, - next: Option>, -} - -impl EnumeratorRead { - fn new(value: VALUE) -> EnumeratorRead { - EnumeratorRead { - value, - next: None, - } - } - - fn read_and_store_overflow(&mut self, buf: &mut [u8], value: &[u8]) -> std::io::Result { - if value.len() > buf.len() { - match value.split_at(buf.len()) { - (current, next) => { - for (index, c) in current.iter().enumerate() { - buf[index] = *c; - } - self.next = Some(next.to_vec()); - Ok(current.len()) - } - } - - } else { - for (index, value) in value.iter().enumerate() { - buf[index] = *value; - } - self.next = None; - Ok(value.len() as usize) - } - } - - fn read_from_external(&mut self, buf: &mut [u8]) -> std::io::Result { - - let value = self.value; - let result = protect(|| { - unsafe { sys::rb_funcall( - value, - sys::rb_intern("next\0".as_ptr() as *const i8), - 0) - } - }); - match result { - Ok(next) => { - let string = String::from_ruby_unwrap(next); - self.read_and_store_overflow(buf, string.as_bytes()) - }, - Err(state) => { - unsafe { sys::rb_jump_tag(state) }; - //Err(std::io::Error::new(ErrorKind::Other, "Ruby Exception")) - } - } - - } -} - -impl Read for EnumeratorRead { - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - match self.next.take() { - Some(ref inner) => self.read_and_store_overflow(buf, inner), - None => self.read_from_external(buf), - } - } -} - -fn csv_reader(reader: R) -> csv::Reader { - csv::ReaderBuilder::new() - .buffer_capacity(16 * 1024) - .has_headers(false) - .from_reader(reader) -} - -fn yield_csv(data: &Enumerator) -> Result<(), csv::Error> { - let mut reader = csv_reader(EnumeratorRead::new(data.value)); - let mut record = csv::ByteRecord::new(); - - while reader.read_byte_record(&mut record)? { - let inner_array = record_to_ruby(&record); - let result = protect(|| { - unsafe { - return sys::rb_yield(inner_array); - } - }); - - if result.is_err() { - unsafe { sys::rb_jump_tag(result.unwrap_err()) }; - } - } - - Ok(()) -} - -fn parse_csv(data: &str) -> Result>, csv::Error> { - csv_reader(data.as_bytes()) - .records() - .map(|r| r.map(|v| record_to_vec(&v))) - .collect() -} - -fn record_to_vec(record: &csv::StringRecord) -> Vec { - record.iter().map(|s| s.to_ruby().unwrap()).collect() -} - -ruby! { - class RscsvReader { - def each_internal(data: Enumerator) -> Result<(), &'static str> { - yield_csv(&data).map_err(|_| "Error parsing CSV") - } - - def parse(data: String) -> Result>, &'static str> { - parse_csv(&data).map_err(|_| "Error parsing CSV") - } - } - - class RscsvWriter { - def generate_line(row: Vec) -> Result { - let mut wtr = csv::WriterBuilder::new().from_writer(vec![]); - - wtr.write_record(&row) - .map(|_| String::from_utf8(wtr.into_inner().unwrap()).unwrap()) - .map_err(|_| "Error generating csv") - } - - def generate_lines(rows: Vec>) -> Result { - generate_lines(&rows).map_err(|_| "Error generating csv") - } - } -} From 5ee5fdf2ad626d0bfb7be3990dd15316de42c256 Mon Sep 17 00:00:00 2001 From: Ville Lautanala Date: Sat, 3 Jan 2026 21:25:32 +0200 Subject: [PATCH 2/7] Use GitHub actions for CI --- .github/workflows/build.yml | 104 ++++++++++++++++++++++++++++++++++ .github/workflows/ci.yml | 13 +++++ .github/workflows/release.yml | 47 +++++++++++++++ .travis.yml | 45 --------------- README.md | 2 - 5 files changed, 164 insertions(+), 47 deletions(-) create mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..f2e7ad8 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,104 @@ +name: Build Gems + +on: + workflow_call: + inputs: + upload-artifacts: + description: "Whether to upload gem artifacts" + type: boolean + default: true + required: false + +jobs: + # Test on multiple Ruby versions and platforms + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + ruby: ["3.0", "3.1", "3.2", "3.3"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable + + - name: Build extension + run: bundle exec rake compile + + - name: Run tests + run: bundle exec rake spec + + # Build native gems for different platforms using rake-compiler-dock + cross-gem: + name: Build native gem for ${{ matrix.platform }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + platform: + - x86_64-linux + - x86_64-linux-musl + - aarch64-linux + - aarch64-linux-musl + - x86_64-darwin + - arm64-darwin + - x64-mingw-ucrt + - x64-mingw32 + steps: + - uses: actions/checkout@v4 + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.3" + bundler-cache: true + + - name: Install dependencies + run: bundle install + + - name: Build native gem for ${{ matrix.platform }} + run: | + bundle exec rake native:${{ matrix.platform }}:gem + env: + RUBY_CC_VERSION: "3.0.0:3.1.0:3.2.0:3.3.0" + + - name: Upload gem artifact + if: inputs.upload-artifacts + uses: actions/upload-artifact@v4 + with: + name: native-gem-${{ matrix.platform }} + path: pkg/*-${{ matrix.platform }}.gem + retention-days: 1 + + # Build the source gem (no pre-built binary) + source-gem: + name: Build source gem + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.3" + bundler-cache: true + + - name: Build source gem + run: gem build rscsv.gemspec + + - name: Upload gem artifact + if: inputs.upload-artifacts + uses: actions/upload-artifact@v4 + with: + name: source-gem + path: "rscsv-*.gem" + retention-days: 1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d84f9be --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,13 @@ +name: CI + +on: + push: + branches: [master, main] + pull_request: + branches: [master, main] + +jobs: + build: + uses: ./.github/workflows/build.yml + with: + upload-artifacts: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..5c95109 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,47 @@ +name: Release + +on: + push: + tags: + - "v*" + +jobs: + # Build all gems using the reusable workflow + build: + uses: ./.github/workflows/build.yml + with: + upload-artifacts: true + + # Publish all gems to RubyGems + publish: + name: Publish gems to RubyGems + needs: build + runs-on: ubuntu-latest + permissions: + contents: write + id-token: write + environment: release + steps: + - uses: actions/checkout@v4 + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.3" + + - name: Download all gem artifacts + uses: actions/download-artifact@v4 + with: + path: pkg + pattern: "*-gem*" + merge-multiple: true + + - name: List gems to publish + run: ls -la pkg/ + + - name: Publish gems to RubyGems (with trusted publishing) + run: | + for gem in pkg/*.gem; do + echo "Publishing $gem" + gem push "$gem" + done diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d8fbc14..0000000 --- a/.travis.yml +++ /dev/null @@ -1,45 +0,0 @@ -sudo: false -language: ruby -rvm: - - 2.4.5 - - 2.5.5 - - 2.6.2 -before_install: - - if [ ! -e "$HOME/.cargo/bin" ]; then curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y; fi - - export PATH="$HOME/.cargo/bin:$PATH" - - rustup default stable - - rustc --version -install: - - bundle install --path vendor/bundle - - bundle exec rake helix:copy_dll build -after_success: - - ruby ./bin/ci-publish $TRAVIS_TAG -matrix: - include: - - os: linux - rvm: 2.3.8 - env: - - GEM_PUBLISH=true - - os: linux - rvm: 2.3.8 - env: - - NATIVE_BUNDLE=true - - GEM_PUBLISH=true - - os: osx - osx_image: xcode9.2 - rvm: 2.3.8 - env: - - NATIVE_BUNDLE=true - - GEM_PUBLISH=true - - os: osx - osx_image: xcode10.1 - rvm: 2.3.8 - env: - - NATIVE_BUNDLE=true - - GEM_PUBLISH=true - - os: osx - osx_image: xcode10.2 - rvm: 2.3.8 - env: - - NATIVE_BUNDLE=true - - GEM_PUBLISH=true diff --git a/README.md b/README.md index cd82381..6689266 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,6 @@ Fast CSV using Rust extensions. Can read arrays of arrays from strings and write strings from arrays of arrays. -[![Build Status](https://travis-ci.org/lautis/rscsv.svg?branch=master)](https://travis-ci.org/lautis/rscsv) - ## Installation This gem requires Rust (~> 1.17) and Cargo to be installed. With those From 24aa421527eed27e5a769c1a50da4e194b35e183 Mon Sep 17 00:00:00 2001 From: Ville Lautanala Date: Sat, 3 Jan 2026 21:31:30 +0200 Subject: [PATCH 3/7] Use oxidize-rb actions for CI --- .github/workflows/build.yml | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f2e7ad8..dbf454c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,14 +22,12 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Ruby - uses: ruby/setup-ruby@v1 + - name: Set up Ruby and Rust + uses: oxidize-rb/actions/setup-ruby-and-rust@v1 with: ruby-version: ${{ matrix.ruby }} bundler-cache: true - - - name: Set up Rust - uses: dtolnay/rust-toolchain@stable + cargo-cache: true - name: Build extension run: bundle exec rake compile @@ -37,7 +35,7 @@ jobs: - name: Run tests run: bundle exec rake spec - # Build native gems for different platforms using rake-compiler-dock + # Build native gems for different platforms using oxidize-rb/cross-gem cross-gem: name: Build native gem for ${{ matrix.platform }} runs-on: ubuntu-latest @@ -56,20 +54,18 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Ruby - uses: ruby/setup-ruby@v1 + - name: Set up Ruby and Rust + uses: oxidize-rb/actions/setup-ruby-and-rust@v1 with: ruby-version: "3.3" bundler-cache: true + cargo-cache: true - - name: Install dependencies - run: bundle install - - - name: Build native gem for ${{ matrix.platform }} - run: | - bundle exec rake native:${{ matrix.platform }}:gem - env: - RUBY_CC_VERSION: "3.0.0:3.1.0:3.2.0:3.3.0" + - name: Build native gem + uses: oxidize-rb/actions/cross-gem@v1 + with: + platform: ${{ matrix.platform }} + ruby-versions: "3.0, 3.1, 3.2, 3.3" - name: Upload gem artifact if: inputs.upload-artifacts From 896e1308290011f6bb9511674426eb9946c6c24d Mon Sep 17 00:00:00 2001 From: Ville Lautanala Date: Sat, 3 Jan 2026 21:39:30 +0200 Subject: [PATCH 4/7] Add ruby 3.4 to build matrix --- .github/workflows/build.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dbf454c..1ea94e5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,6 +8,11 @@ on: type: boolean default: true required: false + ruby-versions: + description: "Ruby versions to test and build for (JSON array)" + type: string + default: '["3.0", "3.1", "3.2", "3.3", "3.4"]' + required: false jobs: # Test on multiple Ruby versions and platforms @@ -17,7 +22,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest] - ruby: ["3.0", "3.1", "3.2", "3.3"] + ruby: ${{ fromJSON(inputs.ruby-versions) }} steps: - uses: actions/checkout@v4 @@ -65,7 +70,7 @@ jobs: uses: oxidize-rb/actions/cross-gem@v1 with: platform: ${{ matrix.platform }} - ruby-versions: "3.0, 3.1, 3.2, 3.3" + ruby-versions: ${{ join(fromJSON(inputs.ruby-versions), ', ') }} - name: Upload gem artifact if: inputs.upload-artifacts From 209e6754be95b19084543aeabbaf0eb53a807810 Mon Sep 17 00:00:00 2001 From: Ville Lautanala Date: Sat, 3 Jan 2026 22:04:10 +0200 Subject: [PATCH 5/7] Update benchmark --- .github/workflows/benchmark.yml | 111 ++++++++++++++++++++++++++++++++ Gemfile | 3 + bin/benchmark | 16 +++++ 3 files changed, 130 insertions(+) create mode 100644 .github/workflows/benchmark.yml diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..7d4dec7 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,111 @@ +name: Benchmark + +on: + push: + branches: [master, main] + pull_request: + branches: [master, main] + +jobs: + benchmark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Ruby and Rust + uses: oxidize-rb/actions/setup-ruby-and-rust@v1 + with: + ruby-version: "3.4" + bundler-cache: true + cargo-cache: true + + - name: Build extension + run: bundle exec rake compile + + - name: Run benchmarks with YJIT + run: | + bundle exec ruby --yjit bin/benchmark | tee benchmark_output.txt + + - name: Parse and format results + run: | + cat > parse_benchmark.rb << 'EOF' + #!/usr/bin/env ruby + + input = File.read('benchmark_output.txt') + + # Split into writing and reading sections + sections = input.split(/^===/) + + def parse_section(section, title) + lines = section.lines + results = [] + + lines.each do |line| + # Match lines like: "rscsv 123.456k (± 5.0%) i/s - 100.000k in 0.500000s" + if line =~ /^(\w[\w\s]+?)\s+(\d+\.?\d*[kM]?)\s+\([^)]+\)\s+i\/s/ + name = $1.strip + ips = $2 + results << { name: name, ips: ips } + end + end + + return nil if results.empty? + + # Build markdown table + table = "### #{title}\n\n" + table += "| Library | Iterations/second |\n" + table += "|---------|------------------|\n" + + results.each do |result| + table += "| #{result[:name]} | #{result[:ips]} |\n" + end + + # Add comparison info + comparison_start = lines.index { |l| l =~ /Comparison:/ } + if comparison_start + table += "\n**Comparison:**\n\n" + lines[(comparison_start + 1)..-1].each do |line| + line = line.strip + next if line.empty? + # Format comparison lines + if line =~ /(.+?):\s+(.+)/ + table += "- #{$1.strip}: #{$2.strip}\n" + end + end + end + + table + end + + output = "## Benchmark Results\n\n" + output += "*Benchmarks run with YJIT enabled on Ruby #{RUBY_VERSION}*\n\n" + + sections.each do |section| + if section =~ /CSV Writing/ + result = parse_section(section, "CSV Writing Performance") + output += result + "\n\n" if result + elsif section =~ /CSV Reading/ + result = parse_section(section, "CSV Reading Performance") + output += result + "\n\n" if result + end + end + + # Write to GitHub Step Summary + if ENV['GITHUB_STEP_SUMMARY'] + File.write(ENV['GITHUB_STEP_SUMMARY'], output) + puts "Results written to GitHub Actions summary" + end + + # Also print to stdout + puts output + EOF + + ruby parse_benchmark.rb + + - name: Upload benchmark output + uses: actions/upload-artifact@v4 + if: always() + with: + name: benchmark-results + path: benchmark_output.txt + retention-days: 30 diff --git a/Gemfile b/Gemfile index fa75df1..7b10390 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,6 @@ source 'https://rubygems.org' gemspec + +# Benchmarking dependencies +gem 'osv', '~> 0.4', require: false if RUBY_VERSION >= '3.2' diff --git a/bin/benchmark b/bin/benchmark index c425be8..c160fbd 100755 --- a/bin/benchmark +++ b/bin/benchmark @@ -1,11 +1,19 @@ #!/usr/bin/env ruby +# Enable YJIT if available (Ruby 3.1+) +if defined?(RubyVM::YJIT) && RubyVM::YJIT.respond_to?(:enable) + RubyVM::YJIT.enable + puts "YJIT enabled" if ENV['DEBUG'] +end + $LOAD_PATH << File.dirname(__FILE__) + '/../lib' require 'benchmark/ips' require 'securerandom' require 'rscsv' require 'csv' +require 'osv' +require 'stringio' rows = (0...1000).map do (0...10).map { SecureRandom.hex } @@ -15,6 +23,7 @@ csv_string = CSV.generate do |csv| rows.each { |row| csv << row } end +puts "\n=== CSV Writing Benchmark ===" Benchmark.ips do |x| x.report('Ruby CSV') do |times| times.times do @@ -31,6 +40,7 @@ Benchmark.ips do |x| x.compare! end +puts "\n=== CSV Reading Benchmark ===" Benchmark.ips do |x| x.report('Ruby CSV') do |times| times.times do @@ -42,5 +52,11 @@ Benchmark.ips do |x| times.times { Rscsv::Reader.parse(csv_string) } end + x.report('osv') do |times| + times.times do + OSV.for_each(StringIO.new(csv_string), result_type: :array) { |row| row } + end + end + x.compare! end From 715bbc69a2c36240187b53ecb1f7b3dabc96ae3c Mon Sep 17 00:00:00 2001 From: Ville Lautanala Date: Sat, 3 Jan 2026 22:15:01 +0200 Subject: [PATCH 6/7] Use job summary for benchmark output --- .github/workflows/benchmark.yml | 67 +++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 7d4dec7..67811f6 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -33,19 +33,31 @@ jobs: input = File.read('benchmark_output.txt') - # Split into writing and reading sections - sections = input.split(/^===/) - - def parse_section(section, title) - lines = section.lines + def parse_section(text, title) + lines = text.lines.map(&:chomp) results = [] + comparisons = [] + in_comparison = false + in_calculating = false lines.each do |line| - # Match lines like: "rscsv 123.456k (± 5.0%) i/s - 100.000k in 0.500000s" - if line =~ /^(\w[\w\s]+?)\s+(\d+\.?\d*[kM]?)\s+\([^)]+\)\s+i\/s/ + # Start of calculation section + if line =~ /^Calculating/ + in_calculating = true + next + end + + # Match result lines after "Calculating" section + # Format: " Ruby CSV 463.779 (± 2.4%) i/s (2.16 ms/i) - ..." + if in_calculating && line =~ /^\s*(.+?)\s\s+(\d+\.?\d*)\s+\(.+?\)\s+i\/s/ name = $1.strip ips = $2 results << { name: name, ips: ips } + elsif line =~ /^Comparison:/ + in_comparison = true + in_calculating = false + elsif in_comparison && line.strip =~ /^(.+?):\s+(.+)/ + comparisons << "- **#{$1.strip}**: #{$2.strip}" end end @@ -53,51 +65,48 @@ jobs: # Build markdown table table = "### #{title}\n\n" - table += "| Library | Iterations/second |\n" - table += "|---------|------------------|\n" + table += "| Library | Iterations/sec |\n" + table += "|---------|---------------|\n" results.each do |result| table += "| #{result[:name]} | #{result[:ips]} |\n" end # Add comparison info - comparison_start = lines.index { |l| l =~ /Comparison:/ } - if comparison_start + if !comparisons.empty? table += "\n**Comparison:**\n\n" - lines[(comparison_start + 1)..-1].each do |line| - line = line.strip - next if line.empty? - # Format comparison lines - if line =~ /(.+?):\s+(.+)/ - table += "- #{$1.strip}: #{$2.strip}\n" - end - end + table += comparisons.join("\n") + "\n" end table end - output = "## Benchmark Results\n\n" - output += "*Benchmarks run with YJIT enabled on Ruby #{RUBY_VERSION}*\n\n" + output = "## 🚀 Benchmark Results\n\n" + output += "_Benchmarks run with YJIT enabled on Ruby #{RUBY_VERSION}_\n\n" + + # Split by === markers to get sections + sections = input.split(/^=== /) sections.each do |section| - if section =~ /CSV Writing/ - result = parse_section(section, "CSV Writing Performance") - output += result + "\n\n" if result - elsif section =~ /CSV Reading/ - result = parse_section(section, "CSV Reading Performance") - output += result + "\n\n" if result + if section.include?("CSV Writing Benchmark") + result = parse_section(section, "📝 CSV Writing Performance") + output += result + "\n" if result + elsif section.include?("CSV Reading Benchmark") + result = parse_section(section, "📖 CSV Reading Performance") + output += result + "\n" if result end end # Write to GitHub Step Summary if ENV['GITHUB_STEP_SUMMARY'] File.write(ENV['GITHUB_STEP_SUMMARY'], output) - puts "Results written to GitHub Actions summary" + puts "✅ Results written to GitHub Actions summary" + else + puts "⚠️ GITHUB_STEP_SUMMARY not available, printing to stdout:" end # Also print to stdout - puts output + puts "\n" + output EOF ruby parse_benchmark.rb From def04742618108136af8760300c792b866841307 Mon Sep 17 00:00:00 2001 From: Ville Lautanala Date: Sat, 3 Jan 2026 22:47:17 +0200 Subject: [PATCH 7/7] Use JSON output for bin/benchmark for CI --- .github/workflows/benchmark.yml | 124 ++++++++++++++++---------------- bin/benchmark | 44 ++++++++++-- 2 files changed, 98 insertions(+), 70 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 67811f6..fb2f12e 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -24,97 +24,93 @@ jobs: - name: Run benchmarks with YJIT run: | - bundle exec ruby --yjit bin/benchmark | tee benchmark_output.txt + bundle exec ruby --yjit bin/benchmark --json > benchmark_results.json - - name: Parse and format results + - name: Format results as markdown run: | - cat > parse_benchmark.rb << 'EOF' + cat > format_results.rb << 'EOF' #!/usr/bin/env ruby + require 'json' - input = File.read('benchmark_output.txt') + data = JSON.parse(File.read('benchmark_results.json')) - def parse_section(text, title) - lines = text.lines.map(&:chomp) - results = [] - comparisons = [] - in_comparison = false - in_calculating = false - - lines.each do |line| - # Start of calculation section - if line =~ /^Calculating/ - in_calculating = true - next - end - - # Match result lines after "Calculating" section - # Format: " Ruby CSV 463.779 (± 2.4%) i/s (2.16 ms/i) - ..." - if in_calculating && line =~ /^\s*(.+?)\s\s+(\d+\.?\d*)\s+\(.+?\)\s+i\/s/ - name = $1.strip - ips = $2 - results << { name: name, ips: ips } - elsif line =~ /^Comparison:/ - in_comparison = true - in_calculating = false - elsif in_comparison && line.strip =~ /^(.+?):\s+(.+)/ - comparisons << "- **#{$1.strip}**: #{$2.strip}" - end - end - - return nil if results.empty? + output = "## 🚀 Benchmark Results\n\n" + output += "_Benchmarks run with YJIT enabled on Ruby #{data['ruby_version']}_\n\n" + + # Writing benchmark + if data['benchmarks']['writing'] + output += "### 📝 CSV Writing Performance\n\n" + output += "| Library | Iterations/sec | Std Dev |\n" + output += "|---------|----------------|----------|\n" - # Build markdown table - table = "### #{title}\n\n" - table += "| Library | Iterations/sec |\n" - table += "|---------|---------------|\n" + writing = data['benchmarks']['writing'] + sorted_writing = writing.sort_by { |_, v| -v['ips'] } - results.each do |result| - table += "| #{result[:name]} | #{result[:ips]} |\n" + sorted_writing.each do |name, stats| + output += "| #{name} | #{stats['ips'].round(1)} | ±#{stats['stddev_percentage'].round(1)}% |\n" end - # Add comparison info - if !comparisons.empty? - table += "\n**Comparison:**\n\n" - table += comparisons.join("\n") + "\n" + # Add comparison + if sorted_writing.length > 1 + fastest = sorted_writing.first + output += "\n**Comparison:**\n\n" + sorted_writing.each do |name, stats| + if name == fastest[0] + output += "- **#{name}**: #{stats['ips'].round(1)} i/s (fastest)\n" + else + slowdown = fastest[1]['ips'] / stats['ips'] + output += "- **#{name}**: #{stats['ips'].round(1)} i/s - #{slowdown.round(2)}x slower\n" + end + end end - - table + output += "\n" end - output = "## 🚀 Benchmark Results\n\n" - output += "_Benchmarks run with YJIT enabled on Ruby #{RUBY_VERSION}_\n\n" - - # Split by === markers to get sections - sections = input.split(/^=== /) - - sections.each do |section| - if section.include?("CSV Writing Benchmark") - result = parse_section(section, "📝 CSV Writing Performance") - output += result + "\n" if result - elsif section.include?("CSV Reading Benchmark") - result = parse_section(section, "📖 CSV Reading Performance") - output += result + "\n" if result + # Reading benchmark + if data['benchmarks']['reading'] + output += "### 📖 CSV Reading Performance\n\n" + output += "| Library | Iterations/sec | Std Dev |\n" + output += "|---------|----------------|----------|\n" + + reading = data['benchmarks']['reading'] + sorted_reading = reading.sort_by { |_, v| -v['ips'] } + + sorted_reading.each do |name, stats| + output += "| #{name} | #{stats['ips'].round(1)} | ±#{stats['stddev_percentage'].round(1)}% |\n" + end + + # Add comparison + if sorted_reading.length > 1 + fastest = sorted_reading.first + output += "\n**Comparison:**\n\n" + sorted_reading.each do |name, stats| + if name == fastest[0] + output += "- **#{name}**: #{stats['ips'].round(1)} i/s (fastest)\n" + else + slowdown = fastest[1]['ips'] / stats['ips'] + output += "- **#{name}**: #{stats['ips'].round(1)} i/s - #{slowdown.round(2)}x slower\n" + end + end end + output += "\n" end # Write to GitHub Step Summary if ENV['GITHUB_STEP_SUMMARY'] File.write(ENV['GITHUB_STEP_SUMMARY'], output) puts "✅ Results written to GitHub Actions summary" - else - puts "⚠️ GITHUB_STEP_SUMMARY not available, printing to stdout:" end # Also print to stdout - puts "\n" + output + puts output EOF - ruby parse_benchmark.rb + ruby format_results.rb - - name: Upload benchmark output + - name: Upload benchmark results uses: actions/upload-artifact@v4 if: always() with: name: benchmark-results - path: benchmark_output.txt + path: benchmark_results.json retention-days: 30 diff --git a/bin/benchmark b/bin/benchmark index c160fbd..26d69a5 100755 --- a/bin/benchmark +++ b/bin/benchmark @@ -14,6 +14,11 @@ require 'rscsv' require 'csv' require 'osv' require 'stringio' +require 'json' + +# Parse command line arguments +json_output = ARGV.include?('--json') +results = { ruby_version: RUBY_VERSION, yjit_enabled: defined?(RubyVM::YJIT), benchmarks: {} } rows = (0...1000).map do (0...10).map { SecureRandom.hex } @@ -23,8 +28,8 @@ csv_string = CSV.generate do |csv| rows.each { |row| csv << row } end -puts "\n=== CSV Writing Benchmark ===" -Benchmark.ips do |x| +puts "\n=== CSV Writing Benchmark ===" unless json_output +write_report = Benchmark.ips(quiet: json_output) do |x| x.report('Ruby CSV') do |times| times.times do CSV.generate do |csv| @@ -37,11 +42,22 @@ Benchmark.ips do |x| times.times { Rscsv::Writer.generate_lines(rows) } end - x.compare! + x.compare! unless json_output end -puts "\n=== CSV Reading Benchmark ===" -Benchmark.ips do |x| +if json_output + write_results = {} + write_report.entries.each do |entry| + write_results[entry.label] = { + ips: entry.stats.central_tendency, + stddev_percentage: entry.stats.error_percentage + } + end + results[:benchmarks][:writing] = write_results +end + +puts "\n=== CSV Reading Benchmark ===" unless json_output +read_report = Benchmark.ips(quiet: json_output) do |x| x.report('Ruby CSV') do |times| times.times do CSV.parse(csv_string) @@ -58,5 +74,21 @@ Benchmark.ips do |x| end end - x.compare! + x.compare! unless json_output +end + +if json_output + read_results = {} + read_report.entries.each do |entry| + read_results[entry.label] = { + ips: entry.stats.central_tendency, + stddev_percentage: entry.stats.error_percentage + } + end + results[:benchmarks][:reading] = read_results +end + +# Output JSON if requested +if json_output + puts JSON.pretty_generate(results) end