diff --git a/docker-images/syntax-highlighter/Cargo.Bazel.lock b/docker-images/syntax-highlighter/Cargo.Bazel.lock index a5b88bf47ea4..3a9a35e238f7 100644 --- a/docker-images/syntax-highlighter/Cargo.Bazel.lock +++ b/docker-images/syntax-highlighter/Cargo.Bazel.lock @@ -1,5 +1,5 @@ { - "checksum": "16af7384c373f405b14982ed18dfb8fd320f4c727086dc073cd004d1316c351d", + "checksum": "29abc93382c22b38f87c9f8db7894be1e88ba39e3e45b46fd61a08be146a87e5", "crates": { "addr2line 0.20.0": { "name": "addr2line", @@ -6479,6 +6479,42 @@ }, "license": "MIT OR Apache-2.0" }, + "minimal-lexical 0.2.1": { + "name": "minimal-lexical", + "version": "0.2.1", + "repository": { + "Http": { + "url": "https://static.crates.io/crates/minimal-lexical/0.2.1/download", + "sha256": "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + } + }, + "targets": [ + { + "Library": { + "crate_name": "minimal_lexical", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "minimal_lexical", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "crate_features": { + "common": [ + "std" + ], + "selects": {} + }, + "edition": "2018", + "version": "0.2.1" + }, + "license": "MIT/Apache-2.0" + }, "miniz_oxide 0.7.1": { "name": "miniz_oxide", "version": "0.7.1", @@ -6797,6 +6833,57 @@ }, "license": "MIT" }, + "nom 7.1.3": { + "name": "nom", + "version": "7.1.3", + "repository": { + "Http": { + "url": "https://static.crates.io/crates/nom/7.1.3/download", + "sha256": "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" + } + }, + "targets": [ + { + "Library": { + "crate_name": "nom", + "crate_root": "src/lib.rs", + "srcs": [ + "**/*.rs" + ] + } + } + ], + "library_target_name": "nom", + "common_attrs": { + "compile_data_glob": [ + "**" + ], + "crate_features": { + "common": [ + "alloc", + "default", + "std" + ], + "selects": {} + }, + "deps": { + "common": [ + { + "id": "memchr 2.5.0", + "target": "memchr" + }, + { + "id": "minimal-lexical 0.2.1", + "target": "minimal_lexical" + } + ], + "selects": {} + }, + "edition": "2018", + "version": "7.1.3" + }, + "license": "MIT" + }, "normalize-line-endings 0.3.0": { "name": "normalize-line-endings", "version": "0.3.0", @@ -10612,6 +10699,10 @@ "id": "lazy_static 1.4.0", "target": "lazy_static" }, + { + "id": "nom 7.1.3", + "target": "nom" + }, { "id": "path-clean 1.0.1", "target": "path_clean" @@ -10657,6 +10748,10 @@ }, "deps_dev": { "common": [ + { + "id": "criterion 0.4.0", + "target": "criterion" + }, { "id": "tempfile 3.10.1", "target": "tempfile" @@ -18756,6 +18851,7 @@ "insta 1.34.0", "itertools 0.10.5", "lazy_static 1.4.0", + "nom 7.1.3", "once_cell 1.18.0", "paste 1.0.14", "path-clean 1.0.1", diff --git a/docker-images/syntax-highlighter/Cargo.lock b/docker-images/syntax-highlighter/Cargo.lock index 38200d95f948..e89d0e5f8e93 100644 --- a/docker-images/syntax-highlighter/Cargo.lock +++ b/docker-images/syntax-highlighter/Cargo.lock @@ -1170,6 +1170,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -1232,6 +1238,16 @@ dependencies = [ "memoffset 0.6.5", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -1917,9 +1933,11 @@ dependencies = [ "camino", "clap 4.3.23", "colored", + "criterion", "indicatif", "insta", "lazy_static", + "nom", "paste", "path-clean", "predicates", diff --git a/docker-images/syntax-highlighter/Cargo.toml b/docker-images/syntax-highlighter/Cargo.toml index 0d0094f54ef4..4e8538f1e78b 100644 --- a/docker-images/syntax-highlighter/Cargo.toml +++ b/docker-images/syntax-highlighter/Cargo.toml @@ -61,6 +61,7 @@ tree-sitter-highlight = "0.20.1" walkdir = "2" path-clean = "1" camino = "1.1" +nom = "7.1.3" scip = "0.3.2" protobuf = "3" diff --git a/docker-images/syntax-highlighter/crates/scip-syntax/BUILD.bazel b/docker-images/syntax-highlighter/crates/scip-syntax/BUILD.bazel index d8fc746b409f..e748055e721b 100644 --- a/docker-images/syntax-highlighter/crates/scip-syntax/BUILD.bazel +++ b/docker-images/syntax-highlighter/crates/scip-syntax/BUILD.bazel @@ -28,9 +28,7 @@ WORKSPACE_DEPS = [ rust_library( name = "scip_syntax_lib", srcs = glob( - [ - "src/*.rs", - ], + ["src/**/*.rs"], allow_empty = False, exclude = ["src/main.rs"], ), @@ -49,7 +47,11 @@ rust_library( rust_test( name = "unit_test", size = "small", - srcs = glob(["src/*.rs"]), + srcs = glob( + ["src/**/*.rs"], + allow_empty = False, + exclude = ["src/main.rs"], + ), proc_macro_deps = all_crate_deps( proc_macro = True, ), @@ -68,7 +70,11 @@ rust_test( rust_test( name = "integration_test", size = "small", - srcs = glob(["tests/*.rs"]), + srcs = glob( + ["src/**/*.rs"], + allow_empty = False, + exclude = ["src/main.rs"], + ), compile_data = glob( [ "testdata/**", diff --git a/docker-images/syntax-highlighter/crates/scip-syntax/Cargo.toml b/docker-images/syntax-highlighter/crates/scip-syntax/Cargo.toml index 9677ad452e77..34548fc80101 100644 --- a/docker-images/syntax-highlighter/crates/scip-syntax/Cargo.toml +++ b/docker-images/syntax-highlighter/crates/scip-syntax/Cargo.toml @@ -6,6 +6,10 @@ edition = "2021" [[bin]] name = "scip-syntax" +[[bench]] +name = "symbol_parsing" +harness = false + [dependencies] assert_cmd = "2.0.12" predicates = "3.0.4" @@ -26,6 +30,7 @@ walkdir = { workspace = true } path-clean = { workspace = true } camino = { workspace = true } tree-sitter = { workspace = true } +nom = { workspace = true } syntax-analysis = { path = "../syntax-analysis" } tree-sitter-all-languages = { path = "../tree-sitter-all-languages" } @@ -33,3 +38,4 @@ tar = "0.4.40" [dev-dependencies] tempfile="3.10.1" +criterion = { version = "0.4", features = ["html_reports"] } diff --git a/docker-images/syntax-highlighter/crates/scip-syntax/benches/symbol_parsing.rs b/docker-images/syntax-highlighter/crates/scip-syntax/benches/symbol_parsing.rs new file mode 100644 index 000000000000..6a7d10fd7c84 --- /dev/null +++ b/docker-images/syntax-highlighter/crates/scip-syntax/benches/symbol_parsing.rs @@ -0,0 +1,47 @@ +use camino::Utf8Path; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use scip_syntax::{io::read_index_from_file, scip_strict}; + +fn parse_symbols(symbols: &[&str]) { + for symbol in symbols { + scip::symbol::parse_symbol(symbol).unwrap(); + } +} + +fn parse_symbols_v2(symbols: &[&str]) { + for symbol in symbols { + scip_strict::Symbol::parse(&symbol).unwrap(); + } +} + +fn symbols_from_index(path: &str) -> impl Iterator { + let index = read_index_from_file(Utf8Path::new(path)) + .unwrap(); + index + .documents + .into_iter() + .flat_map(|document| { + document + .occurrences + .into_iter() + .map(|occurrence| occurrence.symbol) + }) +} + +fn bench_symbol_parsing(c: &mut Criterion) { + // let all_symbols: Vec = symbols_from_index("~/work/scip-indices/spring-framework-syntactic.scip").collect(); + let all_symbols: Vec = symbols_from_index("/Users/creek/work/scip-indices/chromium-1.scip").collect(); + let mut group = c.benchmark_group("symbol parsing"); + for n in [10_000, 100_000, 1_000_000] { + let symbols: Vec<&str> = all_symbols.iter().take(n).map(|s| s.as_str()).collect(); + group.bench_with_input(BenchmarkId::new("parse_v1", n), &symbols, |b, syms| { + b.iter(|| parse_symbols(syms)) + }); + group.bench_with_input(BenchmarkId::new("parse_v2", n), &symbols, |b, syms| { + b.iter(|| parse_symbols_v2(syms)) + }); + } +} + +criterion_group!(benches, bench_symbol_parsing); +criterion_main!(benches); diff --git a/docker-images/syntax-highlighter/crates/scip-syntax/src/evaluate.rs b/docker-images/syntax-highlighter/crates/scip-syntax/src/evaluate.rs index 36f77c4b2e68..0c9af0ee96a4 100644 --- a/docker-images/syntax-highlighter/crates/scip-syntax/src/evaluate.rs +++ b/docker-images/syntax-highlighter/crates/scip-syntax/src/evaluate.rs @@ -14,7 +14,7 @@ use serde::Serializer; use string_interner::{symbol::SymbolU32, StringInterner, Symbol}; use syntax_analysis::range::Range; -use crate::{io::read_index_from_file, progress::*}; +use crate::{io::read_index_from_file, progress::*, scip_strict}; pub fn evaluate_command( candidate: &Utf8Path, @@ -731,16 +731,12 @@ impl SymbolFormatter { fn try_strip_package_details(&mut self, sym: SymbolId) -> SymbolId { let s = self.display_symbol(sym); - if s.as_bytes().iter().filter(|&c| *c == b' ').count() != 5 { + let Result::Ok(scip_strict::Symbol::NonLocal(mut symbol)) = scip_strict::Symbol::parse(s) + else { return sym; - } - let parts: Vec<&str> = s.splitn(5, ' ').collect(); - let scheme = parts[0]; - let _manager = parts[1]; - let _package_name = parts[2]; - let _version = parts[3]; - let descriptor = parts[4]; - self.make_symbol_id(&format!("{scheme} . . . {descriptor}")) + }; + symbol.package = scip_strict::Package::default(); + self.make_symbol_id(&symbol.to_string()) } } diff --git a/docker-images/syntax-highlighter/crates/scip-syntax/src/lib.rs b/docker-images/syntax-highlighter/crates/scip-syntax/src/lib.rs index b725a33bef93..b16d4c0fb3cb 100644 --- a/docker-images/syntax-highlighter/crates/scip-syntax/src/lib.rs +++ b/docker-images/syntax-highlighter/crates/scip-syntax/src/lib.rs @@ -2,3 +2,4 @@ pub mod evaluate; pub mod index; pub mod io; pub mod progress; +pub mod scip_strict; diff --git a/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict.rs b/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict.rs new file mode 100644 index 000000000000..63f27da4a672 --- /dev/null +++ b/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict.rs @@ -0,0 +1,103 @@ +use std::borrow::Cow; + +mod context_error; +mod format; +mod parse; + +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum Symbol<'a> { + Local { local_id: &'a str }, + NonLocal(NonLocalSymbol<'a>), +} + +impl Symbol<'_> { + pub fn parse(raw: &str) -> Result { + parse::parse_symbol(raw) + } + + pub fn is_local(&self) -> bool { + matches!(self, Symbol::Local { .. }) + } +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct NonLocalSymbol<'a> { + pub scheme: Scheme<'a>, + pub package: Package<'a>, + pub descriptors: Vec>, +} + +#[derive(Debug, PartialEq, Eq, Hash, Default)] +pub struct Scheme<'a>(Cow<'a, str>); + +impl Scheme<'_> { + pub fn new(s: &str) -> Scheme { + Scheme(s.into()) + } +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct Package<'a> { + manager: Cow<'a, str>, + package_name: Cow<'a, str>, + version: Cow<'a, str>, +} + +impl Default for Package<'_> { + fn default() -> Self { + Self::new(None, None, None) + } +} + +impl Package<'_> { + pub fn new<'a>( + manager: Option<&'a str>, + package_name: Option<&'a str>, + version: Option<&'a str>, + ) -> Package<'a> { + Package { + manager: manager.unwrap_or(".").into(), + package_name: package_name.unwrap_or(".").into(), + version: version.unwrap_or(".").into(), + } + } + pub fn manager(&self) -> Option<&str> { + let manager = self.manager.as_ref(); + if manager == "." { + None + } else { + Some(manager) + } + } + pub fn package_name(&self) -> Option<&str> { + let package_name = self.package_name.as_ref(); + if package_name == "." { + None + } else { + Some(package_name) + } + } + pub fn version(&self) -> Option<&str> { + let version = self.version.as_ref(); + if version == "." { + None + } else { + Some(version) + } + } +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum Descriptor<'a> { + Namespace(Cow<'a, str>), + Type(Cow<'a, str>), + Term(Cow<'a, str>), + Meta(Cow<'a, str>), + Macro(Cow<'a, str>), + Method { + name: Cow<'a, str>, + disambiguator: Option<&'a str>, + }, + TypeParameter(Cow<'a, str>), + Parameter(Cow<'a, str>), +} diff --git a/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict/context_error.rs b/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict/context_error.rs new file mode 100644 index 000000000000..f7f45f3e5ddc --- /dev/null +++ b/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict/context_error.rs @@ -0,0 +1,49 @@ +use std::fmt; + +use nom::error::{ContextError, ErrorKind, FromExternalError, ParseError}; + +/// default error type, only contains the error's location and code +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct CtxError { + /// position of the error in the input data + pub input: I, + /// contextual error message + pub context: &'static str, +} + +impl ParseError for CtxError { + fn from_error_kind(input: I, _kind: ErrorKind) -> Self { + CtxError { + input, + context: "no context set yet", + } + } + + fn append(_: I, _: ErrorKind, other: Self) -> Self { + other + } +} + +impl ContextError for CtxError { + fn add_context(_input: I, context: &'static str, mut other: Self) -> Self { + other.context = context; + other + } +} + +impl FromExternalError for CtxError { + /// Create a new error from an input position and an external error + fn from_external_error(input: I, _kind: ErrorKind, _e: E) -> Self { + CtxError { + input, + context: "no context set yet", + } + } +} + +/// The Display implementation allows the std::error::Error implementation +impl fmt::Display for CtxError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "error '{}' at: {}", self.context, self.input) + } +} diff --git a/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict/format.rs b/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict/format.rs new file mode 100644 index 000000000000..527a423c1ce6 --- /dev/null +++ b/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict/format.rs @@ -0,0 +1,79 @@ +use std::{borrow::Cow, fmt}; + +use super::{parse, Descriptor, NonLocalSymbol, Package, Scheme, Symbol}; + +impl fmt::Display for Symbol<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Symbol::NonLocal(non_local) => non_local.fmt(f), + Symbol::Local { local_id } => write!(f, "local {}", local_id), + } + } +} + +impl fmt::Display for NonLocalSymbol<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {} ", self.scheme, self.package)?; + for descriptor in &self.descriptors { + write!(f, "{}", descriptor)?; + } + Ok(()) + } +} + +impl fmt::Display for Scheme<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", escape_space_terminated(&self.0)) + } +} + +impl fmt::Display for Package<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{} {} {}", + escape_space_terminated(&self.manager), + escape_space_terminated(&self.package_name), + escape_space_terminated(&self.version), + ) + } +} + +impl fmt::Display for Descriptor<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Descriptor::Namespace(name) => write!(f, "{}/", escape_name(name)), + Descriptor::Type(name) => write!(f, "{}#", escape_name(name)), + Descriptor::Term(name) => write!(f, "{}.", escape_name(name)), + Descriptor::Meta(name) => write!(f, "{}:", escape_name(name)), + Descriptor::Macro(name) => write!(f, "{}!", escape_name(name)), + Descriptor::Method { + name, + disambiguator, + } => write!( + f, + "{}({}).", + escape_name(name), + disambiguator.unwrap_or_default() + ), + Descriptor::TypeParameter(name) => write!(f, "[{}]", escape_name(name)), + Descriptor::Parameter(name) => write!(f, "({})", escape_name(name)), + } + } +} + +fn escape_name<'a>(name: &'a Cow<'a, str>) -> Cow<'a, str> { + if name.chars().all(parse::is_simple_identifier_char) { + name.as_ref().into() + } else { + format!("`{}`", name.replace('`', "``")).into() + } +} + +fn escape_space_terminated<'a>(s: &'a Cow<'a, str>) -> Cow<'a, str> { + if s.contains(' ') { + s.replace(' ', " ").into() + } else { + s.as_ref().into() + } +} diff --git a/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict/parse.rs b/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict/parse.rs new file mode 100644 index 000000000000..c7f16403eabc --- /dev/null +++ b/docker-images/syntax-highlighter/crates/scip-syntax/src/scip_strict/parse.rs @@ -0,0 +1,211 @@ +use std::{borrow::Cow, str}; + +use nom::{ + branch::alt, + bytes::complete::{tag, take_while1}, + character::complete::char, + combinator::{cut, eof, fail, opt, verify}, + error::context, + multi::many1, + sequence::{delimited, preceded, tuple}, + Finish, IResult, Parser, +}; + +use super::{context_error::CtxError, Descriptor, NonLocalSymbol, Package, Scheme, Symbol}; + +pub(super) fn parse_symbol(input: &str) -> Result, String> { + match parse_symbol_inner(input).finish() { + Ok((_, symbol)) => Ok(symbol), + Err(err) => Err(format!("Invalid symbol: '{input}'\n{err}",)), + } +} + +type PResult<'a, A> = IResult<&'a str, A, CtxError<&'a str>>; + +fn parse_symbol_inner(input: &str) -> PResult<'_, Symbol<'_>> { + let (input, symbol) = alt((parse_local_symbol, parse_nonlocal_symbol))(input)?; + eof(input)?; + Ok((input, symbol)) +} + +fn parse_local_symbol(input: &str) -> PResult<'_, Symbol<'_>> { + preceded(tag("local "), parse_simple_identifier_str) + .map(|local_id| Symbol::Local { local_id }) + .parse(input) +} + +fn parse_nonlocal_symbol(input: &str) -> PResult<'_, Symbol<'_>> { + tuple((parse_scheme, parse_package, many1(parse_descriptor))) + .map(|(scheme, package, descriptors)| { + Symbol::NonLocal(NonLocalSymbol { + scheme, + package, + descriptors, + }) + }) + .parse(input) +} + +fn parse_scheme(input: &str) -> PResult<'_, Scheme> { + context( + "Invalid scheme", + verify(parse_space_terminated, |s: &Cow<'_, str>| { + !s.starts_with("local") + }), + ) + .map(Scheme) + .parse(input) +} + +fn parse_package(input: &str) -> PResult<'_, Package> { + tuple(( + context("Invalid package manager", parse_space_terminated), + context("Invalid package name", parse_space_terminated), + context("Invalid package version", parse_space_terminated), + )) + .map(|(manager, package_name, version)| Package { + manager, + package_name, + version, + }) + .parse(input) +} + +fn parse_descriptor(input: &str) -> PResult<'_, Descriptor> { + alt(( + parse_parameter_descriptor, + parse_type_parameter_descriptor, + parse_named_descriptor, + ))(input) +} + +fn parse_type_parameter_descriptor(input: &str) -> PResult<'_, Descriptor> { + delimited(char('['), parse_name, char(']')) + .map(Descriptor::TypeParameter) + .parse(input) +} + +fn parse_parameter_descriptor(input: &str) -> PResult<'_, Descriptor> { + delimited(char('('), parse_name, char(')')) + .map(Descriptor::Parameter) + .parse(input) +} + +fn parse_named_descriptor(input: &str) -> PResult<'_, Descriptor> { + let (input, name) = parse_name(input)?; + match input.chars().next() { + Some('/') => Ok((&input[1..], Descriptor::Namespace(name))), + Some('#') => Ok((&input[1..], Descriptor::Type(name))), + Some('.') => Ok((&input[1..], Descriptor::Term(name))), + Some(':') => Ok((&input[1..], Descriptor::Meta(name))), + Some('!') => Ok((&input[1..], Descriptor::Macro(name))), + Some('(') => { + let (input, disambiguator) = opt(parse_simple_identifier_str)(&input[1..])?; + let (input, _) = tag(").")(input)?; + Ok(( + input, + Descriptor::Method { + name, + disambiguator, + }, + )) + } + _ => context("Missing descriptor suffix", cut(fail))(input), + } +} + +fn parse_name(input: &str) -> PResult<'_, Cow<'_, str>> { + alt((parse_escaped_identifier, parse_simple_identifier))(input) +} + +pub fn is_simple_identifier_char(c: char) -> bool { + c.is_ascii_alphanumeric() || c == '_' || c == '+' || c == '-' || c == '$' +} + +fn parse_simple_identifier_str(input: &str) -> PResult<'_, &str> { + take_while1(is_simple_identifier_char)(input) +} + +fn parse_simple_identifier(input: &str) -> PResult<'_, Cow<'_, str>> { + parse_simple_identifier_str.map(Cow::Borrowed).parse(input) +} + +fn parse_escaped_identifier(input: &str) -> PResult<'_, Cow<'_, str>> { + let (input, _) = char('`')(input)?; + let (input, name) = parse_terminated(input, b'`')?; + let (input, _) = char('`')(input)?; + Ok((input, name)) +} + +fn parse_space_terminated(input: &str) -> PResult<'_, Cow<'_, str>> { + let (input, terminated) = parse_terminated(input, b' ')?; + let (input, _) = char(' ')(input)?; + Ok((input, terminated)) +} + +fn parse_terminated(input: &str, terminator: u8) -> PResult<'_, Cow<'_, str>> { + let mut needs_escape = false; + let mut current = input; + while let Some(offset) = current.find(terminator as char) { + let (_, rest) = current.split_at(offset + 1); + if rest.starts_with(terminator as char) { + needs_escape = true; + current = &rest[1..]; + } else { + let (raw, rest) = input.split_at(input.len() - rest.len() - 1); + let escaped = if needs_escape { + Cow::Owned(raw.replace( + str::from_utf8(&[terminator, terminator]).unwrap(), + str::from_utf8(&[terminator]).unwrap(), + )) + } else { + Cow::Borrowed(raw) + }; + return Ok((rest, escaped)); + } + } + context("Missing terminator", cut(fail))(current) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parsing_symbols() { + assert_eq!( + Symbol::parse("scip-java . . . Dude#lol!waow.") + .unwrap() + .to_string(), + "scip-java . . . Dude#lol!waow." + ); + assert_eq!( + Symbol::parse("scip java . . . Dude#lol!waow.") + .unwrap() + .to_string(), + "scip java . . . Dude#lol!waow." + ); + assert_eq!( + Symbol::parse("scip java . . . `Dude```#`lol`!waow.") + .unwrap() + .to_string(), + "scip java . . . `Dude```#lol!waow." + ); + assert_eq!(Symbol::parse("local 1").unwrap().to_string(), "local 1"); + assert_eq!( + Symbol::parse("rust-analyzer cargo test_rust_dependency 0.1.0 println!") + .unwrap() + .to_string(), + "rust-analyzer cargo test_rust_dependency 0.1.0 println!" + ); + assert_eq!( + Symbol::NonLocal(NonLocalSymbol { + scheme: Default::default(), + package: Default::default(), + descriptors: vec![Descriptor::Type("hi".into())] + }) + .to_string(), + " . . . hi#" + ); + } +}