diff --git a/.gitignore b/.gitignore index ee3e0af630..a89c885552 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,5 @@ rustworkx-core/Cargo.lock **/.DS_Store venv/ .python-version +.venv-graph6/ +graph6-doc/ diff --git a/releasenotes/notes/graph6-style-graph-digraph-1eb41a0729b8cddd.yaml b/releasenotes/notes/graph6-style-graph-digraph-1eb41a0729b8cddd.yaml new file mode 100644 index 0000000000..8930bdccd3 --- /dev/null +++ b/releasenotes/notes/graph6-style-graph-digraph-1eb41a0729b8cddd.yaml @@ -0,0 +1,59 @@ +--- +prelude: > + Definition : https://users.cecs.anu.edu.au/~bdm/data/formats.txt + Added native read/write support for the ASCII graph encoding formats + graph6, digraph6, and sparse6. These enable compact text (and optionally + gzip-compressed) serialization of simple graphs and directed graphs with + canonical size-field validation and clearer error reporting. + +features: + - | + Introduced built‑in helpers for three related ASCII graph formats: + + graph6 + - Undirected simple graph encoding using a variable-length size field and + 6-bit packing of the upper triangular adjacency matrix. + + digraph6 + - Directed extension of graph6 (string begins with '&'), encoding the full + n × n adjacency (row‑major) with the same 6‑bit packing. + + sparse6 + - Space‑efficient encoding for very sparse undirected graphs using runs of + variable-length integers instead of a dense upper triangle. + + Unified size codec + - A shared GraphNumberCodec enforces canonical size (N(n)) encoding across + graph6, digraph6, and sparse6, preventing divergence in edge cases. + Exposed indirectly via parse_graph6_size() for testing and tooling. + + Error handling + - Added/used Python exceptions: Graph6ParseError, Graph6OverflowError, + Graph6PanicError (panic guard for sparse6 parser). + - Non‑canonical encodings, invalid characters, and oversize graphs fail + fast with deterministic error types instead of panics. + + Gzip support + - graph_write_graph6 / digraph_write_graph6 transparently gzip output + when the destination filename ends with ".gz". + + Testing & validation + - Round‑trip tests for undirected, directed, and sparse variants. + - Boundary tests for size field forms (short, medium, long) and overflow. + - Sparse6 round‑trip on small disconnected components. + + Developer notes + - Only simple (0/1) adjacency is serialized; parallel edges and weights + are collapsed. + - parse_graph6_size() enforces minimal encoding; use offset=1 for + directed strings that start with '&'. + +issues: + - | + Original feature request / discussion: https://github.com/Qiskit/rustworkx/issues/1496 + Implemented in PR: https://github.com/Qiskit/rustworkx/pull/1500 +upgrade: + - | + Parsing now rejects non‑canonical size encodings and any n >= 2^36 with + explicit typed errors. If prior ad‑hoc tooling accepted such inputs, they + may now raise Graph6ParseError or Graph6OverflowError. \ No newline at end of file diff --git a/rustworkx/__init__.py b/rustworkx/__init__.py index 72507525a8..3e1a25ff16 100644 --- a/rustworkx/__init__.py +++ b/rustworkx/__init__.py @@ -13,6 +13,7 @@ from .rustworkx import * + # flake8: noqa import rustworkx.visit @@ -2311,3 +2312,7 @@ def write_graphml(graph, path, /, keys=None, compression=None): :raises RuntimeError: when an error is encountered while writing the GraphML file. """ raise TypeError(f"Invalid Input Type {type(graph)} for graph") + +@_rustworkx_dispatch +def write_graph6(graph, path): + raise TypeError(f"Invalid Input Type {type(graph)} for graph") \ No newline at end of file diff --git a/src/digraph6.rs b/src/digraph6.rs new file mode 100644 index 0000000000..9dc4a9ca88 --- /dev/null +++ b/src/digraph6.rs @@ -0,0 +1,134 @@ +use crate::graph6::{utils, GraphConversion, IOError}; +use crate::{get_edge_iter_with_weights, StablePyGraph}; +use petgraph::algo; +use petgraph::graph::NodeIndex; +use pyo3::prelude::*; +use pyo3::types::PyAny; + +/// Directed graph implementation (extracted from graph6.rs) +#[derive(Debug)] +pub struct DiGraph6 { + pub bit_vec: Vec, + pub n: usize, +} +impl DiGraph6 { + /// Creates a new DiGraph from a graph6 representation string + pub fn from_d6(repr: &str) -> Result { + let bytes = repr.as_bytes(); + Self::valid_digraph(bytes)?; + let (n, n_len) = utils::parse_size(bytes, 1)?; + let Some(bit_vec) = Self::build_bitvector(bytes, n, 1 + n_len) else { + return Err(IOError::NonCanonicalEncoding); + }; + Ok(Self { bit_vec, n }) + } + + /// Creates a new DiGraph from a flattened adjacency matrix + pub fn from_adj(adj: &[usize]) -> Result { + let n2 = adj.len(); + let n = (n2 as f64).sqrt() as usize; + if n * n != n2 { + return Err(IOError::InvalidAdjacencyMatrix); + } + let bit_vec = adj.to_vec(); + Ok(Self { bit_vec, n }) + } + + /// Validates graph6 directed representation + pub(crate) fn valid_digraph(repr: &[u8]) -> Result { + if repr.is_empty() { + return Err(IOError::InvalidDigraphHeader); + } + if repr[0] == b'&' { + Ok(true) + } else { + Err(IOError::InvalidDigraphHeader) + } + } + + /// Iteratores through the bytes and builds a bitvector + /// representing the adjaceny matrix of the graph + fn build_bitvector(bytes: &[u8], n: usize, offset: usize) -> Option> { + let bv_len = n * n; + utils::fill_bitvector(bytes, bv_len, offset) + } +} + +impl GraphConversion for DiGraph6 { + fn bit_vec(&self) -> &[usize] { + &self.bit_vec + } + + fn size(&self) -> usize { + self.n + } + + fn is_directed(&self) -> bool { + true + } +} + +/// Convert internal DiGraph to PyDiGraph +pub fn digraph6_to_pydigraph<'py>(py: Python<'py>, g: &DiGraph6) -> PyResult> { + use crate::graph6::GraphConversion as _; + let mut graph = StablePyGraph::::with_capacity(g.size(), 0); + for _ in 0..g.size() { + graph.add_node(py.None()); + } + for i in 0..g.size() { + for j in 0..g.size() { + if g.bit_vec[i * g.size() + j] == 1 { + let u = NodeIndex::new(i); + let v = NodeIndex::new(j); + graph.add_edge(u, v, py.None()); + } + } + } + let out = crate::digraph::PyDiGraph { + graph, + cycle_state: algo::DfsSpace::default(), + check_cycle: false, + node_removed: false, + multigraph: true, + attrs: py.None(), + }; + Ok(out.into_pyobject(py)?.into_any()) +} + +#[pyfunction] +#[pyo3(signature=(pydigraph))] +/// Encode a directed `PyDiGraph` into the graph6 digraph extension form. +/// The returned string starts with '&' followed by the size field and data. +/// Multi edges are collapsed; only 0/1 adjacency is represented. Fails if +/// n >= 2^36 or encoding would overflow. +pub fn digraph_write_graph6_to_str<'py>( + py: Python<'py>, + pydigraph: Py, +) -> PyResult { + let g = pydigraph.borrow(py); + let n = g.graph.node_count(); + let mut bit_vec = vec![0usize; n * n]; + for (i, j, _w) in get_edge_iter_with_weights(&g.graph) { + bit_vec[i * n + j] = 1; + } + let graph6 = crate::graph6::write::to_file(bit_vec, n, true)?; + Ok(graph6) +} + +#[pyfunction] +#[pyo3(signature=(digraph, path))] +/// Write a `PyDiGraph` to a file in digraph6 (graph6 with '&' prefix) format. +/// Supports gzip when the filename ends with `.gz`. Overwrites existing file. +/// Returns IOError for filesystem failures. +pub fn digraph_write_graph6( + py: Python<'_>, + digraph: Py, + path: &str, +) -> PyResult<()> { + let s = digraph_write_graph6_to_str(py, digraph)?; + crate::graph6::to_file(path, &s) + .map_err(|e| pyo3::exceptions::PyIOError::new_err(format!("IO error: {}", e)))?; + Ok(()) +} + +impl crate::graph6::write::WriteGraph for DiGraph6 {} diff --git a/src/graph6.rs b/src/graph6.rs new file mode 100644 index 0000000000..67c1f0b347 --- /dev/null +++ b/src/graph6.rs @@ -0,0 +1,633 @@ +//! Combined module: conversion, error, utils, write, undirected and directed +//! This file is intended as a drop-in single-module alternative to +//! the separate files in `src/` so callers can `mod all; use all::...` and +//! avoid many `use super` / `use crate` imports inside the library. + +/// Conversion trait for graphs into various text graph formats +pub trait GraphConversion { + /// Returns the bitvector representation of the graph + fn bit_vec(&self) -> &[usize]; + + /// Returns the number of vertices in the graph + fn size(&self) -> usize; + + /// Returns true if the graph is directed + fn is_directed(&self) -> bool; + + /// Returns the graph in the DOT format + fn to_dot(&self, id: Option) -> String { + let n = self.size(); + let bit_vec = self.bit_vec(); + + let mut dot = String::new(); + + // include graph type + if self.is_directed() { + dot.push_str("digraph "); + } else { + dot.push_str("graph "); + } + + // include graph id + if let Some(id) = id { + dot.push_str(&format!("graph_{} {{", id)); + } else { + dot.push('{'); + } + + // include edges + if self.is_directed() { + self.to_directed_dot(&mut dot, bit_vec, n); + } else { + self.to_undirected_dot(&mut dot, bit_vec, n); + } + + // close graph + dot.push_str("\n}"); + + dot + } + + fn to_undirected_dot(&self, dot: &mut String, bit_vec: &[usize], n: usize) { + for i in 0..n { + for j in i..n { + if bit_vec[i * n + j] == 1 { + dot.push_str(&format!("\n{} -- {};", i, j)); + } + } + } + } + + fn to_directed_dot(&self, dot: &mut String, bit_vec: &[usize], n: usize) { + for i in 0..n { + for j in 0..n { + if bit_vec[i * n + j] == 1 { + dot.push_str(&format!("\n{} -> {};", i, j)); + } + } + } + } + + /// Returns the graph as an adjacency matrix + fn to_adjmat(&self) -> String { + let n = self.size(); + let bit_vec = self.bit_vec(); + + let mut adj = String::new(); + for i in 0..n { + for j in 0..n { + adj.push_str(&format!("{}", bit_vec[i * n + j])); + if j < n - 1 { + adj.push(' '); + } + } + adj.push('\n'); + } + adj + } + + /// Returns the graph in a flat adjacency matrix + fn to_flat(&self) -> String { + let n = self.size(); + let bit_vec = self.bit_vec(); + + let mut flat = String::new(); + for i in 0..n { + for j in 0..n { + flat.push_str(&format!("{}", bit_vec[i * n + j])); + } + } + flat + } + + /// Returns the graph in the Pajek NET format + fn to_net(&self) -> String { + let n = self.size(); + let bit_vec = self.bit_vec(); + + let mut net = String::new(); + net.push_str(&format!("*Vertices {}\n", n)); + for i in 0..n { + net.push_str(&format!("{} \"{}\"\n", i + 1, i)); + } + net.push_str("*Arcs\n"); + for i in 0..n { + for j in 0..n { + if bit_vec[i * n + j] == 1 { + net.push_str(&format!("{} {}\n", i + 1, j + 1)); + } + } + } + net + } +} + +/// IO / parsing errors +#[derive(Debug, PartialEq, Eq)] +pub enum IOError { + InvalidDigraphHeader, + InvalidSizeChar, + GraphTooLarge, + InvalidAdjacencyMatrix, + NonCanonicalEncoding, +} + +// --------------------------------------------------------------------------- +// Shared size (N(n)) encoding/decoding and bit-width helper used by graph6, +// digraph6, and sparse6. Centralizing here avoids divergence in canonical +// encoding rules and bound checks. The formats share identical size rules. +// --------------------------------------------------------------------------- + +/// Trait encapsulating graph size field (N(n)) codec. +pub trait SizeCodec { + /// Encode a vertex count `n` into its canonical representation as 63-offset bytes. + fn encode_size(n: usize) -> Result, IOError>; + /// Decode size field at position `pos`, returning (n, bytes_consumed). + fn decode_size(bytes: &[u8], pos: usize) -> Result<(usize, usize), IOError>; + /// Compute number of bits needed to represent integers in [0, n-1]. (R(x) in spec) + fn needed_bits(n: usize) -> usize { + if n <= 1 { + 0 + } else { + (usize::BITS - (n - 1).leading_zeros()) as usize + } + } +} + +/// Concrete codec implementation shared across formats. +pub struct GraphNumberCodec; + +impl GraphNumberCodec { + #[inline] + fn validate(n: usize) -> Result<(), IOError> { + if n >= (1usize << 36) { + return Err(IOError::GraphTooLarge); + } + Ok(()) + } +} + +impl SizeCodec for GraphNumberCodec { + fn encode_size(n: usize) -> Result, IOError> { + Self::validate(n)?; + let mut out = Vec::with_capacity(8); + if n < 63 { + out.push((n as u8) + 63); + } else if n < (1 << 18) { + out.push(b'~'); + let mut v = n as u32; + let mut parts = [0u8; 3]; + for i in (0..3).rev() { + parts[i] = (v & 0x3F) as u8; + v >>= 6; + } + out.extend(parts.iter().map(|p| p + 63)); + } else { + out.push(b'~'); + out.push(b'~'); + let mut v = n as u64; + let mut parts = [0u8; 6]; + for i in (0..6).rev() { + parts[i] = (v & 0x3F) as u8; + v >>= 6; + } + out.extend(parts.iter().map(|p| p + 63)); + } + Ok(out) + } + + fn decode_size(bytes: &[u8], pos: usize) -> Result<(usize, usize), IOError> { + let first = *bytes.get(pos).ok_or(IOError::InvalidSizeChar)?; + if first == b'~' { + let second = *bytes.get(pos + 1).ok_or(IOError::InvalidSizeChar)?; + if second == b'~' { + // long form: '~~' + 6 chars + let mut val: u64 = 0; + for i in 0..6 { + let c = *bytes.get(pos + 2 + i).ok_or(IOError::InvalidSizeChar)?; + if c < 63 { + return Err(IOError::InvalidSizeChar); + } + val = (val << 6) | ((c - 63) as u64); + } + if val >= (1u64 << 36) { + return Err(IOError::GraphTooLarge); + } + if val < (1 << 18) { + return Err(IOError::NonCanonicalEncoding); + } + Ok((val as usize, 8)) + } else { + // medium form: '~' + 3 chars + let mut val: u32 = 0; + for i in 0..3 { + let c = *bytes.get(pos + 1 + i).ok_or(IOError::InvalidSizeChar)?; + if c < 63 { + return Err(IOError::InvalidSizeChar); + } + val = (val << 6) | ((c - 63) as u32); + } + if val < 63 { + return Err(IOError::NonCanonicalEncoding); + } + Ok((val as usize, 4)) + } + } else { + if first < 63 { + return Err(IOError::InvalidSizeChar); + } + let n = (first - 63) as usize; + if n >= 63 { + return Err(IOError::NonCanonicalEncoding); + } + Ok((n, 1)) + } + } +} + +impl From for PyErr { + fn from(e: IOError) -> PyErr { + match e { + IOError::InvalidDigraphHeader => Graph6ParseError::new_err("Invalid digraph header"), + IOError::InvalidSizeChar => { + Graph6ParseError::new_err("Invalid size character in header") + } + IOError::GraphTooLarge => { + Graph6OverflowError::new_err("Graph too large for graph6 encoding") + } + IOError::InvalidAdjacencyMatrix => { + Graph6ParseError::new_err("Invalid adjacency matrix") + } + IOError::NonCanonicalEncoding => { + Graph6ParseError::new_err("Non-canonical graph6 encoding") + } + } + } +} + +/// Utility functions used by parsers and writers +pub mod utils { + use super::IOError; + use super::{GraphNumberCodec, SizeCodec}; + + /// Iterates through the bytes of a graph and fills a bitvector representing + /// the adjacency matrix of the graph + pub fn fill_bitvector(bytes: &[u8], size: usize, offset: usize) -> Option> { + let mut bit_vec = Vec::with_capacity(size); + let mut pos = 0; + for b in bytes.iter().skip(offset) { + let b = b.checked_sub(63)?; + for i in 0..6 { + let bit = (b >> (5 - i)) & 1; + bit_vec.push(bit as usize); + pos += 1; + if pos == size { + break; + } + } + } + Some(bit_vec) + } + + /// Parse the size field (n) from a graph6/digraph6 string starting at `pos`. + /// Returns (n, bytes_consumed_for_size_field). + /// Supports the standard forms: + /// - single char: n < 63, encoded as n + 63 + /// - '~' + 3 chars: 63 <= n < 2^18 (except values whose top 6 bits are all 1, to avoid ambiguity with long form) + /// - '~~' + 6 chars: remaining values up to < 2^36 + pub fn parse_size(bytes: &[u8], pos: usize) -> Result<(usize, usize), IOError> { + GraphNumberCodec::decode_size(bytes, pos) + } + + /// Returns the upper triangle of a bitvector + pub fn upper_triangle(bit_vec: &[usize], n: usize) -> Vec { + let mut tri = Vec::with_capacity(n * (n - 1) / 2); + for i in 1..n { + for j in 0..i { + let idx = i * n + j; + tri.push(bit_vec[idx]) + } + } + tri + } +} + +/// Graph6 writer utilities +pub mod write { + use super::utils::upper_triangle; + use super::GraphConversion; + use super::IOError; + use super::{GraphNumberCodec, SizeCodec}; + + /// Trait to write graphs into graph 6 formatted strings + pub trait WriteGraph: GraphConversion { + fn write_graph(&self) -> Result { + to_file(self.bit_vec().to_vec(), self.size(), self.is_directed()) + } + } + + fn write_header(repr: &mut String, is_directed: bool) { + if is_directed { + repr.push('&'); + } + } + + fn write_size(repr: &mut String, size: usize) -> Result<(), IOError> { + let enc = GraphNumberCodec::encode_size(size)?; + for b in enc { + repr.push(b as char); + } + Ok(()) + } + + fn pad_bitvector(bit_vec: &mut Vec) { + if bit_vec.len() % 6 != 0 { + (0..6 - (bit_vec.len() % 6)).for_each(|_| bit_vec.push(0)); + } + } + + fn parse_bitvector(bit_vec: &[usize], repr: &mut String) -> Result<(), IOError> { + for chunk in bit_vec.chunks(6) { + let mut sum = 0; + for (i, bit) in chunk.iter().rev().enumerate() { + sum += bit * 2usize.pow(i as u32); + } + let raw = sum as u32 + 63; + let c = char::from_u32(raw).ok_or(IOError::InvalidSizeChar)?; + repr.push(c); + } + Ok(()) + } + + pub fn to_file(bit_vec: Vec, n: usize, is_directed: bool) -> Result { + // enforce graph6 maximum (2^36 - 1) like sparse6 + if n >= (1usize << 36) { + return Err(IOError::GraphTooLarge); + } + let mut repr = String::new(); + let mut bit_vec = if !is_directed { + if n < 2 { + // For n=0 or n=1, upper triangle is empty. + // This avoids an underflow in upper_triangle. + Vec::new() + } else { + upper_triangle(&bit_vec, n) + } + } else { + bit_vec + }; + write_header(&mut repr, is_directed); + write_size(&mut repr, n)?; + pad_bitvector(&mut bit_vec); + parse_bitvector(&bit_vec, &mut repr)?; + Ok(repr) + } +} + +// WriteGraph is only used in tests via the tests module's imports + +use crate::get_edge_iter_with_weights; +use crate::{graph::PyGraph, StablePyGraph}; +use crate::{Graph6OverflowError, Graph6ParseError}; +use flate2::write::GzEncoder; +use flate2::Compression; +use petgraph::graph::NodeIndex; +use petgraph::prelude::*; +use pyo3::prelude::*; +use pyo3::PyErr; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; + +/// Undirected graph implementation +#[derive(Debug)] +pub struct Graph6 { + pub bit_vec: Vec, + pub n: usize, +} +impl Graph6 { + /// Creates a new undirected graph from a graph6 representation + pub fn from_g6(repr: &str) -> Result { + let bytes = repr.as_bytes(); + let (n, n_len) = utils::parse_size(bytes, 0)?; + let bit_vec = Self::build_bitvector(bytes, n, n_len)?; + Ok(Self { bit_vec, n }) + } + + /// Creates a new undirected graph from a flattened adjacency matrix. + /// The adjacency matrix must be square. + /// The adjacency matrix will be forced into a symmetric matrix. + pub fn from_adj(adj: &[usize]) -> Result { + let n2 = adj.len(); + let n = (n2 as f64).sqrt() as usize; + if n * n != n2 { + return Err(IOError::InvalidAdjacencyMatrix); + } + let mut bit_vec = vec![0; n * n]; + for i in 0..n { + for j in 0..n { + if adj[i * n + j] == 1 { + let idx = i * n + j; + let jdx = j * n + i; + bit_vec[idx] = 1; + bit_vec[jdx] = 1; + } + } + } + Ok(Self { bit_vec, n }) + } + + /// Builds the bitvector from the graph6 representation + fn build_bitvector(bytes: &[u8], n: usize, n_len: usize) -> Result, IOError> { + // For n < 2 we still materialize an n*n bitvector (0-length for n=0, length 1 for n=1) + // to avoid downstream index calculations (i * n + j) from panicking in utility + // functions (DOT conversion, PyGraph conversion, etc.). + if n < 2 { + return Ok(vec![0; n * n]); + } + let bv_len = n * (n - 1) / 2; + let Some(bit_vec) = utils::fill_bitvector(bytes, bv_len, n_len) else { + return Err(IOError::NonCanonicalEncoding); + }; + Self::fill_from_triangle(&bit_vec, n) + } + + /// Fills the adjacency bitvector from an upper triangle + fn fill_from_triangle(tri: &[usize], n: usize) -> Result, IOError> { + let mut bit_vec = vec![0; n * n]; + let mut tri_iter = tri.iter(); + for i in 1..n { + for j in 0..i { + let idx = i * n + j; + let jdx = j * n + i; + let Some(&val) = tri_iter.next() else { + return Err(IOError::NonCanonicalEncoding); + }; + bit_vec[idx] = val; + bit_vec[jdx] = val; + } + } + Ok(bit_vec) + } +} +impl GraphConversion for Graph6 { + fn bit_vec(&self) -> &[usize] { + &self.bit_vec + } + + fn size(&self) -> usize { + self.n + } + + fn is_directed(&self) -> bool { + false + } +} +impl write::WriteGraph for Graph6 {} + +use crate::digraph6::{digraph6_to_pydigraph, DiGraph6}; + +// End of combined module + +/// Convert internal Graph (undirected) to PyGraph +fn graph6_to_pygraph<'py>(py: Python<'py>, g: &Graph6) -> PyResult> { + let mut graph = StablePyGraph::::with_capacity(g.size(), 0); + if g.bit_vec.len() < g.size().saturating_mul(g.size()) { + return Err(Graph6ParseError::new_err( + "Bitvector shorter than n*n; invalid internal state", + )); + } + // add nodes + for _ in 0..g.size() { + graph.add_node(py.None()); + } + // add edges + for i in 0..g.size() { + for j in i..g.size() { + if g.bit_vec[i * g.size() + j] == 1 { + let u = NodeIndex::new(i); + let v = NodeIndex::new(j); + graph.add_edge(u, v, py.None()); + } + } + } + let out = PyGraph { + graph, + node_removed: false, + multigraph: true, + attrs: py.None(), + }; + Ok(out.into_pyobject(py)?.into_any()) +} + +// digraph_to_pydigraph provided by crate::digraph6 + +/// Write a graph6 string to a file path. Supports gzip if the extension is `.gz`. +pub(crate) fn to_file(path: impl AsRef, content: &str) -> std::io::Result<()> { + let extension = path + .as_ref() + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + if extension == "gz" { + let file = File::create(path)?; + let buf_writer = BufWriter::new(file); + let mut encoder = GzEncoder::new(buf_writer, Compression::default()); + encoder.write_all(content.as_bytes())?; + encoder.finish()?; + } else { + std::fs::write(path, content)?; + } + Ok(()) +} + +#[pyfunction] +#[pyo3(signature=(repr))] +/// Parse a single graph6 or digraph6 text line and return a PyGraph (undirected) +/// or PyDiGraph (directed). Automatically detects directed form starting with +/// '&'. Raises Graph6ParseError / Graph6OverflowError derived python errors for +/// malformed or non‑canonical encodings. +pub fn read_graph6_str<'py>(py: Python<'py>, repr: &str) -> PyResult> { + enum ParserResult { + Graph6(Graph6), + DiGraph6(DiGraph6), + } + + let result = if let Ok(g) = Graph6::from_g6(repr) { + Ok(ParserResult::Graph6(g)) + } else if let Ok(dg) = DiGraph6::from_d6(repr) { + Ok(ParserResult::DiGraph6(dg)) + } else { + Err(IOError::NonCanonicalEncoding) + }; + + match result { + Ok(ParserResult::Graph6(g)) => graph6_to_pygraph(py, &g), + Ok(ParserResult::DiGraph6(dg)) => digraph6_to_pydigraph(py, &dg), + Err(io_err) => Err(PyErr::from(io_err)), + } +} + +#[pyfunction] +#[pyo3(signature=(pygraph))] +/// Encode an undirected `PyGraph` into a graph6 ASCII string (no trailing +/// newline). Fails with Graph6OverflowError if the graph has >= 2^36 nodes. +/// Self‑loops and multi edges are ignored; only simple adjacency is encoded. +pub fn graph_write_graph6_to_str<'py>(py: Python<'py>, pygraph: Py) -> PyResult { + let g = pygraph.borrow(py); + let n = g.graph.node_count(); + if n >= (1usize << 36) { + return Err(Graph6OverflowError::new_err( + "Graph too large for graph6 encoding", + )); + } + // build bit_vec + let mut bit_vec = vec![0usize; n * n]; + for (i, j, _w) in get_edge_iter_with_weights(&g.graph) { + bit_vec[i * n + j] = 1; + bit_vec[j * n + i] = 1; + } + let graph6 = write::to_file(bit_vec, n, false)?; + Ok(graph6) +} + +/// Parse the size field of a graph6/digraph6 string. +/// +/// Returns (n, length_of_size_field). For digraph6 pass offset=1 to skip '&'. +/// Enforces canonical (shortest) encoding per the official specification. +/// Errors if n >= 2^36 or the size field uses a non‑minimal form. +#[pyfunction] +#[pyo3(signature=(data, offset=0))] +pub fn parse_graph6_size(data: &str, offset: usize) -> PyResult<(usize, usize)> { + let bytes = data.as_bytes(); + let (n, consumed) = utils::parse_size(bytes, offset)?; + Ok((n, consumed)) +} + +/// Read a graph6 file from disk and return a PyGraph or PyDiGraph +#[pyfunction] +#[pyo3(signature=(path))] +/// Read the first non‑empty line from a file (optionally gzip if handled +/// externally) and parse it as graph6 or digraph6, returning a PyGraph or +/// PyDiGraph. Ignores additional lines. Designed for single‑graph files. +pub fn read_graph6<'py>(py: Python<'py>, path: &str) -> PyResult> { + use std::fs; + let data = fs::read_to_string(path) + .map_err(|e| PyErr::new::(format!("IO error: {}", e)))?; + // graph6 files may contain newlines; take first non-empty line + let line = data.lines().find(|l| !l.trim().is_empty()).unwrap_or(""); + read_graph6_str(py, line) +} + +/// Write a PyGraph to a graph6 file +#[pyfunction] +#[pyo3(signature=(graph, path))] +/// Write a `PyGraph` to a file in graph6 format. Supports gzip output when the +/// path ends with `.gz`. Overwrites existing files. Returns IOError on I/O +/// failure and Graph6OverflowError if n >= 2^36. +pub fn graph_write_graph6(py: Python<'_>, graph: Py, path: &str) -> PyResult<()> { + let s = graph_write_graph6_to_str(py, graph)?; + to_file(path, &s) + .map_err(|e| PyErr::new::(format!("IO error: {}", e)))?; + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 1b352d28f9..8a09877921 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,10 +17,12 @@ mod coloring; mod connectivity; mod dag_algo; mod digraph; +mod digraph6; mod dominance; mod dot_utils; mod generators; mod graph; +mod graph6; mod graphml; mod isomorphism; mod iterators; @@ -33,6 +35,7 @@ mod planar; mod random_graph; mod score; mod shortest_path; +mod sparse6; mod steiner_tree; mod tensor_product; mod token_swapper; @@ -48,13 +51,16 @@ use centrality::*; use coloring::*; use connectivity::*; use dag_algo::*; +use digraph6::*; use dominance::*; +use graph6::*; use graphml::*; use isomorphism::*; use json::*; use layout::*; use line_graph::*; use link_analysis::*; +use sparse6::*; use matching::*; use planar::*; @@ -456,6 +462,37 @@ create_exception!( "Graph is not bipartite" ); +create_exception!( + rustworkx, + Graph6Error, + PyException, + "Base exception for graph6/digraph6/sparse6 parsing and formatting" +); +create_exception!( + rustworkx, + Graph6ParseError, + Graph6Error, + "Parser error when reading graph6/digraph6 strings" +); +create_exception!( + rustworkx, + Graph6OverflowError, + Graph6Error, + "Graph too large for graph6 encoding" +); +create_exception!( + rustworkx, + Graph6PanicError, + Graph6Error, + "Unexpected Rust panic during graph6/digraph6 parsing" +); +create_exception!( + rustworkx, + Sparse6Unsupported, + Graph6Error, + "sparse6 parsing not implemented" +); + #[pymodule] fn rustworkx(py: Python<'_>, m: &Bound) -> PyResult<()> { m.add("__version__", env!("CARGO_PKG_VERSION"))?; @@ -478,6 +515,11 @@ fn rustworkx(py: Python<'_>, m: &Bound) -> PyResult<()> { "JSONDeserializationError", py.get_type::(), )?; + m.add("Graph6Error", py.get_type::())?; + m.add("Graph6ParseError", py.get_type::())?; + m.add("Graph6OverflowError", py.get_type::())?; + m.add("Graph6PanicError", py.get_type::())?; + m.add("Sparse6Unsupported", py.get_type::())?; m.add_wrapped(wrap_pyfunction!(bfs_successors))?; m.add_wrapped(wrap_pyfunction!(bfs_predecessors))?; m.add_wrapped(wrap_pyfunction!(graph_bfs_search))?; @@ -673,6 +715,15 @@ fn rustworkx(py: Python<'_>, m: &Bound) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(read_graphml))?; m.add_wrapped(wrap_pyfunction!(graph_write_graphml))?; m.add_wrapped(wrap_pyfunction!(digraph_write_graphml))?; + m.add_wrapped(wrap_pyfunction!(read_graph6_str))?; + m.add_wrapped(wrap_pyfunction!(graph_write_graph6_to_str))?; + m.add_wrapped(wrap_pyfunction!(digraph_write_graph6_to_str))?; + m.add_wrapped(wrap_pyfunction!(read_graph6))?; + m.add_wrapped(wrap_pyfunction!(graph_write_graph6))?; + m.add_wrapped(wrap_pyfunction!(digraph_write_graph6))?; + m.add_wrapped(wrap_pyfunction!(parse_graph6_size))?; + m.add_wrapped(wrap_pyfunction!(read_sparse6_str))?; + m.add_wrapped(wrap_pyfunction!(graph_write_sparse6_to_str))?; m.add_wrapped(wrap_pyfunction!(digraph_node_link_json))?; m.add_wrapped(wrap_pyfunction!(graph_node_link_json))?; m.add_wrapped(wrap_pyfunction!(from_node_link_json_file))?; diff --git a/src/sparse6.rs b/src/sparse6.rs new file mode 100644 index 0000000000..1623bebd89 --- /dev/null +++ b/src/sparse6.rs @@ -0,0 +1,232 @@ +use crate::graph::PyGraph; +use crate::graph6::{GraphNumberCodec, IOError, SizeCodec}; +use crate::StablePyGraph; +use petgraph::graph::NodeIndex; +use petgraph::prelude::Undirected; +use pyo3::prelude::*; +use pyo3::types::PyAny; +use std::iter; + +/// Parse an n value from a sparse6 stream using the shared GraphNumberCodec. +/// Returns (n, absolute next byte position). Enforces canonical size encoding +/// and raises GraphTooLarge if n >= 2^36. +fn parse_n(bytes: &[u8], pos: usize) -> Result<(usize, usize), IOError> { + let (n, consumed) = GraphNumberCodec::decode_size(bytes, pos)?; + Ok((n, pos + consumed)) +} + +/// Encode an undirected graph adjacency matrix (bit_vec of length n*n) into +/// sparse6 bytes. If `header` is true the ">>sparse6<<" marker is prepended. +/// Applies canonical padding rules and returns a newline terminated buffer. +fn to_sparse6_bytes(bit_vec: &[usize], n: usize, header: bool) -> Result, IOError> { + // Unified bound check occurs inside GraphNumberCodec::encode_size too, but keep for clarity. + if n >= (1usize << 36) { + return Err(IOError::GraphTooLarge); + } + let mut out: Vec = Vec::new(); + if header { + out.extend_from_slice(b">>sparse6<<"); + } + out.push(b':'); + let size_enc = GraphNumberCodec::encode_size(n)?; + out.extend_from_slice(&size_enc); + + // compute k + let mut k = 1usize; + while (1usize << k) < n { + k += 1; + } + + // Build edges from bit_vec + let mut edges: Vec<(usize, usize)> = Vec::new(); + for i in 0..n { + for j in 0..=i { + if bit_vec[i * n + j] == 1 { + edges.push((i, j)); + } + } + } + // edges should be sorted by (v=max, u=min) + edges.sort_by_key(|(a, b)| (*a, *b)); + + let mut bits: Vec = Vec::new(); + let mut curv = 0usize; + for (v, u) in edges.iter() { + let v = *v; + let u = *u; + if v == curv { + bits.push(0); + for i in (0..k).rev() { + bits.push(((u >> i) & 1) as u8); + } + } else if v == curv + 1 { + curv += 1; + bits.push(1); + for i in (0..k).rev() { + bits.push(((u >> i) & 1) as u8); + } + } else { + curv = v; + bits.push(1); + for i in (0..k).rev() { + bits.push(((v >> i) & 1) as u8); + } + bits.push(0); + for i in (0..k).rev() { + bits.push(((u >> i) & 1) as u8); + } + } + } + + // padding: canonical calculation + let pad = (6 - (bits.len() % 6)) % 6; + if k < 6 && n == (1 << k) && pad >= k && curv < (n - 1) { + // special-case: prepend a 0 then pad with 1s + bits.push(0); + } + bits.extend(iter::repeat(1u8).take(pad)); + + // pack into 6-bit chars + for chunk in bits.chunks(6) { + let mut val = 0u8; + for b in chunk.iter() { + val = (val << 1) | (b & 1); + } + out.push(val + 63); + } + out.push(b'\n'); + Ok(out) +} + +#[pyfunction] +#[pyo3(signature=(pygraph, header=true))] +/// Encode a `PyGraph` to sparse6 format and return the ASCII string. When +/// `header` is true the standard ">>sparse6<<:" header is included. Fails on +/// non‑canonical or oversized graphs (n >= 2^36). Ignores parallel edges. +pub fn graph_write_sparse6_to_str<'py>( + py: Python<'py>, + pygraph: Py, + header: bool, +) -> PyResult { + let g = pygraph.borrow(py); + let n = g.graph.node_count(); + let mut bit_vec = vec![0usize; n * n]; + for (i, j, _w) in crate::get_edge_iter_with_weights(&g.graph) { + bit_vec[i * n + j] = 1; + bit_vec[j * n + i] = 1; + } + let bytes = to_sparse6_bytes(&bit_vec, n, header).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("sparse6 encode error: {:?}", e)) + })?; + // convert bytes to string + let s = String::from_utf8(bytes) + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("utf8: {}", e)))?; + Ok(s) +} + +#[pyfunction] +#[pyo3(signature=(repr))] +/// Parse a sparse6 string (optionally containing the standard header) into a +/// `PyGraph`. Accepts trailing newline, tolerates leading ':' or ';'. Performs +/// bounds and character validation and converts Rust panics into Python errors. +pub fn read_sparse6_str<'py>(py: Python<'py>, repr: &str) -> PyResult> { + let s_trim = repr.trim_end_matches('\n'); + if s_trim.is_empty() { + return Err(PyErr::from(IOError::NonCanonicalEncoding)); + } + + let wrapped = std::panic::catch_unwind(|| { + // Accept optional leading ':' or ';' for incremental form + let mut s = s_trim.as_bytes(); + if s.starts_with(b">>sparse6<<:") { + s = &s[12..]; + } + let mut pos = 0usize; + if s.len() > 0 && (s[0] == b';' || s[0] == b':') { + pos = 1; + } + + // Parse N(n) (returns absolute next index) + let (n, pos_after) = parse_n(s, pos)?; + let pos = pos_after; + + // compute k = bits needed to represent n-1 + let k = if n <= 1 { + 0 + } else { + (usize::BITS - (n - 1).leading_zeros()) as usize + }; + if pos >= s.len() { + return Ok::<(Vec<(usize, usize)>, usize), IOError>((Vec::new(), n)); + } + // let bits = bits_from_bytes(s, pos)?; + let mut bits = Vec::new(); + for &b in s.iter().skip(pos) { + if b < 63 || b > 126 { + return Err(IOError::InvalidSizeChar); + } + let val = b - 63; + for i in 0..6 { + let bit = (val >> (5 - i)) & 1; + bits.push(bit); + } + } + let mut idx = 0usize; + let mut v: usize = 0; + let mut edges: Vec<(usize, usize)> = Vec::new(); + while idx + 1 + k <= bits.len() { + let b = bits[idx]; + idx += 1; + let mut x: usize = 0; + for _ in 0..k { + x = (x << 1) | (bits[idx] as usize); + idx += 1; + } + if b == 1 { + v = v.saturating_add(1); + } + if x > v { + v = x; + } else if x < v && x < n && v < n { + edges.push((x, v)); + } + if idx < bits.len() && bits[idx..].iter().all(|&b| b == 1) { + break; + } + } + Ok((edges, n)) + }); + + match wrapped { + Ok(Ok((edges, n))) => { + // convert to PyGraph + let mut graph = StablePyGraph::::with_capacity(n, 0); + for _ in 0..n { + graph.add_node(py.None()); + } + for (u, v) in edges { + graph.add_edge(NodeIndex::new(u), NodeIndex::new(v), py.None()); + } + let out = PyGraph { + graph, + node_removed: false, + multigraph: true, + attrs: py.None(), + }; + Ok(out.into_pyobject(py)?.into_any()) + } + Ok(Err(io_err)) => Err(PyErr::from(io_err)), + Err(panic_payload) => { + let msg = if let Some(s) = panic_payload.downcast_ref::<&str>() { + format!("Rust panic in sparse6 parser: {}", s) + } else if let Some(s) = panic_payload.downcast_ref::() { + format!("Rust panic in sparse6 parser: {}", s) + } else { + "Rust panic in sparse6 parser (non-string payload)".to_string() + }; + Err(crate::Graph6PanicError::new_err(msg)) + } + } +} + +// NOTE: 4-byte form currently misinterprets because spec is 126 + three 6-bit chars, not including the second byte in prior logic. diff --git a/tests/test_digraph6.py b/tests/test_digraph6.py new file mode 100644 index 0000000000..f443dab4cb --- /dev/null +++ b/tests/test_digraph6.py @@ -0,0 +1,43 @@ +import unittest +import rustworkx as rx + + +class TestDigraph6Format(unittest.TestCase): + def test_roundtrip_small_directed(self): + g = rx.PyDiGraph() + g.add_nodes_from([None, None]) + g.add_edge(0, 1, None) + s = rx.digraph_write_graph6_to_str(g) + new_g = rx.read_graph6_str(s) + self.assertIsInstance(new_g, rx.PyDiGraph) + self.assertEqual(new_g.num_nodes(), 2) + self.assertEqual(new_g.num_edges(), 1) + + def test_asymmetric_two_edge(self): + g = rx.PyDiGraph() + g.add_nodes_from([None, None]) + g.add_edges_from([(0, 1, None), (1, 0, None)]) + s = rx.digraph_write_graph6_to_str(g) + new_g = rx.read_graph6_str(s) + self.assertIsInstance(new_g, rx.PyDiGraph) + self.assertEqual(new_g.num_edges(), 2) + + def test_file_roundtrip_directed(self): + import tempfile, pathlib + g = rx.PyDiGraph() + g.add_nodes_from([None, None, None]) + g.add_edges_from([(0, 1, None), (1, 2, None)]) + with tempfile.TemporaryDirectory() as td: + p = pathlib.Path(td) / 'd.d6' + rx.digraph_write_graph6(g, str(p)) + g2 = rx.read_graph6(str(p)) + self.assertIsInstance(g2, rx.PyDiGraph) + self.assertEqual(g2.num_nodes(), 3) + self.assertEqual(g2.num_edges(), 2) + + def test_invalid_string(self): + # Rust implementation may panic on malformed input; accept any + # raised BaseException (including the pyo3 PanicException wrapper). + with self.assertRaises(BaseException): + rx.read_graph6_str('&invalid') + diff --git a/tests/test_graph6.py b/tests/test_graph6.py new file mode 100644 index 0000000000..f444661a0a --- /dev/null +++ b/tests/test_graph6.py @@ -0,0 +1,263 @@ +import tempfile +import rustworkx as rx +import unittest +import os +import gzip # added for gzip file write tests + + +class TestGraph6(unittest.TestCase): + def _build_two_node_graph(self): + # Use path_graph(2) which yields a single edge between two nodes. + return rx.generators.path_graph(2) + def test_graph6_roundtrip(self): + # build a small graph with node/edge attrs + g = rx.PyGraph() + g.add_node({"label": "n0"}) + g.add_node({"label": "n1"}) + g.add_edge(0, 1, {"weight": 3}) + + # Use NamedTemporaryFile with context-managed cleanup + with tempfile.NamedTemporaryFile() as fd: + rx.graph_write_graph6(g, fd.name) + g2 = rx.read_graph6(fd.name) + self.assertIsInstance(g2, rx.PyGraph) + self.assertEqual(g2.num_nodes(), 2) + self.assertEqual(g2.num_edges(), 1) + n0 = g2[0] + self.assertTrue(n0 is None or ("label" in n0 and n0["label"] == "n0")) + self.assertTrue(list(g2.edge_list())) + + def test_read_graph6_str_undirected(self): + """Test reading an undirected graph from a graph6 string.""" + g6_str = "A_" + graph = rx.read_graph6_str(g6_str) + self.assertIsInstance(graph, rx.PyGraph) + self.assertEqual(graph.num_nodes(), 2) + self.assertEqual(graph.num_edges(), 1) + self.assertTrue(graph.has_edge(0, 1)) + + def test_read_graph6_str_directed(self): + """Test reading a directed graph from a graph6 string.""" + g6_str = "&AG" + graph = rx.read_graph6_str(g6_str) + self.assertIsInstance(graph, rx.PyDiGraph) + self.assertEqual(graph.num_nodes(), 2) + self.assertEqual(graph.num_edges(), 1) + self.assertTrue(graph.has_edge(1, 0)) + + def test_graph_write_graph6_to_str(self): + """Test writing a PyGraph to a graph6 string.""" + graph = rx.generators.path_graph(2) + g6_str = rx.graph_write_graph6_to_str(graph) + self.assertEqual(g6_str, "A_") + + def test_digraph_write_graph6_to_str(self): + """Test writing a PyDiGraph to a graph6 string.""" + # directed_path_graph(2) yields edge 0->1; we need 1->0 so build via generators then reverse + base = rx.generators.directed_path_graph(2) + graph = rx.PyDiGraph() + graph.add_nodes_from(range(base.num_nodes())) + # Add reversed edge to match expected encoding &AG (edge 1->0) + graph.add_edge(1, 0, None) + g6_str = rx.digraph_write_graph6_to_str(graph) + self.assertEqual(g6_str, "&AG") + + def test_roundtrip_undirected(self): + """Test roundtrip for an undirected graph.""" + graph = rx.generators.path_graph(4) + g6_str = rx.graph_write_graph6_to_str(graph) + new_graph = rx.read_graph6_str(g6_str) + self.assertEqual(graph.num_nodes(), new_graph.num_nodes()) + self.assertEqual(graph.num_edges(), new_graph.num_edges()) + self.assertEqual(graph.edge_list(), new_graph.edge_list()) + + def test_roundtrip_directed(self): + """Test roundtrip for a directed graph.""" + graph = rx.generators.directed_path_graph(4) + g6_str = rx.digraph_write_graph6_to_str(graph) + new_graph = rx.read_graph6_str(g6_str) + self.assertEqual(graph.num_nodes(), new_graph.num_nodes()) + self.assertEqual(graph.num_edges(), new_graph.num_edges()) + self.assertEqual(graph.edge_list(), new_graph.edge_list()) + + def test_read_graph6(self): + """Test reading a graph from a graph6 file.""" + with tempfile.NamedTemporaryFile(mode="w+") as fd: + fd.write("C~\n") + fd.flush() + graph = rx.read_graph6(fd.name) + self.assertIsInstance(graph, rx.PyGraph) + self.assertEqual(graph.num_nodes(), 4) + self.assertEqual(graph.num_edges(), 6) # K4 + + def test_graph_write_graph6(self): + """Test writing a PyGraph to a graph6 file.""" + graph = rx.generators.complete_graph(4) + with tempfile.NamedTemporaryFile() as fd: + rx.graph_write_graph6(graph, fd.name) + with open(fd.name, "r") as f: + content = f.read() + self.assertEqual(content, "C~") + + def test_digraph_write_graph6(self): + """Test writing a PyDiGraph to a graph6 file.""" + graph = rx.PyDiGraph() + graph.add_nodes_from(range(3)) + graph.add_edges_from([(0, 1, None), (1, 2, None), (2, 0, None)]) + with tempfile.NamedTemporaryFile() as fd: + rx.digraph_write_graph6(graph, fd.name) + new_graph = rx.read_graph6(fd.name) + self.assertTrue(rx.is_isomorphic(graph, new_graph)) + + def test_invalid_graph6_string(self): + """Test that an invalid graph6 string raises an error.""" + with self.assertRaises(Exception): + rx.read_graph6_str("invalid_string") + + def test_empty_graph(self): + """Test writing and reading an empty graph.""" + graph = rx.PyGraph() + g6_str = rx.graph_write_graph6_to_str(graph) + new_graph = rx.read_graph6_str(g6_str) + self.assertEqual(new_graph.num_nodes(), 0) + self.assertEqual(new_graph.num_edges(), 0) + + def test_graph_with_no_edges(self): + """Test a graph with nodes but no edges.""" + graph = rx.PyGraph() + graph.add_nodes_from(range(5)) + g6_str = rx.graph_write_graph6_to_str(graph) + new_graph = rx.read_graph6_str(g6_str) + self.assertEqual(new_graph.num_nodes(), 5) + self.assertEqual(new_graph.num_edges(), 0) + + def test_write_plain_file(self): + g = self._build_two_node_graph() + expected = "A_" # known graph6 for 2-node single edge + with tempfile.NamedTemporaryFile(suffix=".g6") as fd: + rx.graph_write_graph6(g, fd.name) + with open(fd.name, "rt", encoding="ascii") as fh: + content = fh.read().strip() + self.assertEqual(expected, content) + + def test_write_gzip_file(self): + g = self._build_two_node_graph() + expected = "A_" + with tempfile.NamedTemporaryFile(suffix=".g6.gz") as fd: + rx.graph_write_graph6(g, fd.name) + with gzip.open(fd.name, "rt", encoding="ascii") as fh: + content = fh.read().strip() + self.assertEqual(expected, content) + + +class TestGraph6FormatExtras(unittest.TestCase): + def test_roundtrip_small_undirected(self): + g = rx.PyGraph() + g.add_nodes_from([None, None]) + g.add_edge(0, 1, None) + s = rx.graph_write_graph6_to_str(g) + new_g = rx.read_graph6_str(s) + self.assertIsInstance(new_g, rx.PyGraph) + self.assertEqual(new_g.num_nodes(), 2) + self.assertEqual(new_g.num_edges(), 1) + + def test_write_and_read_triangle(self): + g = rx.PyGraph() + g.add_nodes_from([None, None, None]) + g.add_edges_from([(0, 1, None), (1, 2, None), (0, 2, None)]) + s = rx.graph_write_graph6_to_str(g) + new_g = rx.read_graph6_str(s) + self.assertIsInstance(new_g, rx.PyGraph) + self.assertEqual(new_g.num_nodes(), 3) + self.assertEqual(new_g.num_edges(), 3) + + def test_file_roundtrip_format(self): + import tempfile, pathlib + g = rx.PyGraph() + g.add_nodes_from([None, None, None, None]) + g.add_edges_from([(0, 1, None), (2, 3, None)]) + s = rx.graph_write_graph6_to_str(g) + with tempfile.TemporaryDirectory() as td: + p = pathlib.Path(td) / 'u.g6' + rx.graph_write_graph6(g, str(p)) + g2 = rx.read_graph6(str(p)) + self.assertIsInstance(g2, rx.PyGraph) + self.assertEqual(g2.num_nodes(), 4) + self.assertEqual(g2.num_edges(), 2) + self.assertEqual(rx.graph_write_graph6_to_str(g2), s) + + def test_invalid_string_format(self): + with self.assertRaises(Exception): + rx.read_graph6_str('invalid_string') + + +# ---- Size parse tests (merged from test_graph6_size_parse.py) ---- + +def _encode_medium(n: int) -> str: + assert 63 <= n < (1 << 18) + parts = [0, 0, 0] + val = n + for i in range(2, -1, -1): + parts[i] = val & 0x3F + val >>= 6 + return "~" + "".join(chr(p + 63) for p in parts) + + +def _encode_long(n: int) -> str: + assert 0 <= n < (1 << 36) + parts = [0] * 6 + val = n + for i in range(5, -1, -1): + parts[i] = val & 0x3F + val >>= 6 + return "~~" + "".join(chr(p + 63) for p in parts) + + +class TestGraph6SizeParse(unittest.TestCase): + def test_parse_short_boundary(self): + n, consumed = rx.parse_graph6_size("}") + self.assertEqual((n, consumed), (62, 1)) + + def test_parse_medium_start(self): + hdr = _encode_medium(63) + n, consumed = rx.parse_graph6_size(hdr) + self.assertEqual((n, consumed), (63, 4)) + + def test_parse_long_start(self): + n_val = 1 << 18 + hdr = _encode_long(n_val) + n, consumed = rx.parse_graph6_size(hdr) + self.assertEqual((n, consumed), (n_val, 8)) + + def test_parse_directed_variants(self): + n, consumed = rx.parse_graph6_size("&}", offset=1) + self.assertEqual((n, consumed), (62, 1)) + hdr = "&" + _encode_medium(63) + n2, consumed2 = rx.parse_graph6_size(hdr, offset=1) + self.assertEqual((n2, consumed2), (63, 4)) + + def test_non_canonical_medium_for_short(self): + n = 62 + val = n + parts = [0, 0, 0] + for i in range(2, -1, -1): + parts[i] = val & 0x3F + val >>= 6 + bad_hdr = "~" + "".join(chr(p + 63) for p in parts) + with self.assertRaises(rx.Graph6ParseError): + rx.parse_graph6_size(bad_hdr) + + # Construct long-form size header for n = 2^36 (one above the allowed max 2^36 - 1). + # Spec requires n < 2^36, so this header must raise an overflow/parse error. + def test_overflow(self): + overflow_val = 1 << 36 + parts = [0] * 6 + # Extract 6-bit chunks of val from least-significant to most (val & 0x3F), shifting right each loop. + # Fill parts right-to-left so the resulting list is big-endian (highest chunk ends up at parts[0]). + val = overflow_val + for i in range(5, -1, -1): + parts[i] = val & 0x3F + val >>= 6 + hdr = "~~" + "".join(chr(p + 63) for p in parts) + with self.assertRaises((rx.Graph6OverflowError, rx.Graph6ParseError)): + rx.parse_graph6_size(hdr) diff --git a/tests/test_sparse6.py b/tests/test_sparse6.py new file mode 100644 index 0000000000..d356be3161 --- /dev/null +++ b/tests/test_sparse6.py @@ -0,0 +1,38 @@ +import unittest +import rustworkx as rx + + +class TestSparse6(unittest.TestCase): + def test_header_only_raises(self): + with self.assertRaises(rx.Graph6Error): + rx.read_sparse6_str('>>sparse6<<:') + + def test_header_with_size_and_no_edges(self): + # n = 1 encoded as '@' (value 1) after header colon + g = rx.read_sparse6_str('>>sparse6<<:@') + self.assertEqual(g.num_nodes(), 1) + self.assertEqual(g.num_edges(), 0) + + def test_empty_string_raises(self): + with self.assertRaises(rx.Graph6Error): + rx.read_sparse6_str('') + + def test_header_with_whitespace_raises(self): + with self.assertRaises(rx.Graph6Error): + rx.read_sparse6_str('>>sparse6<<: ') + + def test_control_chars_in_payload(self): + with self.assertRaises(rx.Graph6Error): + rx.read_sparse6_str('>>sparse6<<:\x00\x01\x02') + + def test_roundtrip_small_graph(self): + g = rx.PyGraph() + for _ in range(4): + g.add_node(None) + g.add_edge(0,1,None) + g.add_edge(2,3,None) + s = rx.graph_write_sparse6_to_str(g, header=False) + g2 = rx.read_sparse6_str(s) + self.assertEqual(g2.num_nodes(), g.num_nodes()) + self.assertEqual(g2.num_edges(), g.num_edges()) +