Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions fixtures/small/euc_jp_encoding_actual.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# encoding: EUC-JP

def ����(̾��)
"����ˤ��ϡ�#{̾��}����"
end
5 changes: 5 additions & 0 deletions fixtures/small/euc_jp_encoding_expected.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# encoding: EUC-JP

def ����(̾��)
"����ˤ��ϡ�#{̾��}����"
end
4 changes: 1 addition & 3 deletions librubyfmt/src/comment_block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ impl CommentBlock {
// Ignore empty vecs -- these represent blank lines between
// groups of comments
if !comment.is_empty() && !comment.starts_with(b"=begin") {
comment
.to_mut()
.splice(0..0, indent.as_bytes().iter().copied());
comment.to_mut().splice(0..0, indent.iter().copied());
}
}
self
Expand Down
156 changes: 80 additions & 76 deletions librubyfmt/src/format_prism.rs

Large diffs are not rendered by default.

49 changes: 27 additions & 22 deletions librubyfmt/src/heredoc_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ pub enum HeredocKind {
}

impl HeredocKind {
pub fn from_string(kind_str: &str) -> Self {
if kind_str.contains('~') {
pub fn from_bytes(kind_bytes: &[u8]) -> Self {
if kind_bytes.contains(&b'~') {
HeredocKind::Squiggly
} else if kind_str.contains('-') {
} else if kind_bytes.contains(&b'-') {
HeredocKind::Dash
} else {
HeredocKind::Bare
Expand All @@ -35,23 +35,23 @@ impl HeredocKind {
/// that should not be indented.
#[derive(Debug, Clone)]
pub enum HeredocSegment {
Normal(String),
Normal(Vec<u8>),
/// Content from nested non-squiggly heredocs, should never receive squiggly indentation.
/// This includes both the heredoc content and the closing identifier.
Raw(String),
Raw(Vec<u8>),
}

#[derive(Debug, Clone)]
pub struct HeredocString<'src> {
symbol: Cow<'src, str>,
symbol: Cow<'src, [u8]>,
pub kind: HeredocKind,
pub segments: Vec<HeredocSegment>,
pub indent: ColNumber,
}

impl<'src> HeredocString<'src> {
pub fn new(
symbol: Cow<'src, str>,
symbol: Cow<'src, [u8]>,
kind: HeredocKind,
segments: Vec<HeredocSegment>,
indent: ColNumber,
Expand All @@ -64,49 +64,50 @@ impl<'src> HeredocString<'src> {
}
}

pub fn render_as_string(self) -> String {
pub fn render_as_bytes(self) -> Vec<u8> {
let indent = self.indent;

if self.kind.is_squiggly() {
// For squiggly heredocs, we need to apply indentation to Normal segments
// but not to Raw segments (which come from nested non-squiggly heredocs).
let mut result = String::new();
let mut result = Vec::new();
for segment in self.segments {
match segment {
HeredocSegment::Normal(content) => {
// Apply squiggly indentation to each line
for (i, line) in content.split('\n').enumerate() {
for (i, line) in content.split(|&b| b == b'\n').enumerate() {
if i > 0 {
result.push('\n');
result.push(b'\n');
}
let indented = format!("{}{}", get_indent(indent as usize + 2), line);
result.push_str(indented.trim_end());
let mut indented = get_indent(indent as usize + 2).into_owned();
indented.extend_from_slice(line);
result.extend_from_slice(indented.trim_ascii_end());
}
}
HeredocSegment::Raw(content) => {
// No indentation for raw content (nested non-squiggly heredocs)
for (i, line) in content.split('\n').enumerate() {
for (i, line) in content.split(|&b| b == b'\n').enumerate() {
if i > 0 {
result.push('\n');
result.push(b'\n');
}
result.push_str(line.trim_end());
result.extend_from_slice(line.trim_ascii_end());
}
}
}
}
result
} else {
// For non-squiggly heredocs, just join segments and trim line endings
let mut result = String::new();
let mut result = Vec::new();
for segment in self.segments {
let content = match segment {
HeredocSegment::Normal(s) | HeredocSegment::Raw(s) => s,
};
for (i, line) in content.split('\n').enumerate() {
for (i, line) in content.split(|&b| b == b'\n').enumerate() {
if i > 0 {
result.push('\n');
result.push(b'\n');
}
result.push_str(line.trim_end());
result.extend_from_slice(line.trim_ascii_end());
}
}
result
Expand All @@ -127,7 +128,11 @@ impl<'src> HeredocString<'src> {
/// However, the closing symbol should *not* have
/// quotes, so we must strip them from the symbol when
/// rendering the closing symbol.
pub fn closing_symbol(&self) -> String {
self.symbol.replace(['\'', '"'], "")
pub fn closing_symbol(&self) -> Vec<u8> {
self.symbol
.iter()
.filter(|&&b| b != b'\'' && b != b'"')
.copied()
.collect()
}
}
30 changes: 12 additions & 18 deletions librubyfmt/src/line_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub fn clats_direct_part<'src>(
})
}

pub fn clats_heredoc_close<'src>(symbol: String) -> ConcreteLineTokenAndTargets<'src> {
pub fn clats_heredoc_close<'src>(symbol: Vec<u8>) -> ConcreteLineTokenAndTargets<'src> {
ConcreteLineTokenAndTargets::ConcreteLineToken(ConcreteLineToken::HeredocClose { symbol })
}

Expand Down Expand Up @@ -65,7 +65,7 @@ pub enum ConcreteLineToken<'src> {
},
DoubleQuote,
LTStringContent {
content: Cow<'src, str>,
content: Cow<'src, [u8]>,
},
SingleSlash,
Comment {
Expand All @@ -76,7 +76,7 @@ pub enum ConcreteLineToken<'src> {
},
End,
HeredocClose {
symbol: String,
symbol: Vec<u8>,
},
// These are "magic" tokens. They have no concrete representation,
// but they're meaningful inside of the render queue
Expand All @@ -85,21 +85,18 @@ pub enum ConcreteLineToken<'src> {
EndCallChainIndent,
HeredocStart {
kind: HeredocKind,
symbol: &'src str,
symbol: &'src [u8],
},
RawHeredocContent {
content: String,
content: Vec<u8>,
},
}

impl<'src> ConcreteLineToken<'src> {
pub fn into_ruby(self) -> Cow<'src, [u8]> {
match self {
Self::HardNewLine => Cow::Borrowed(b"\n"),
Self::Indent { depth } => match get_indent(depth as usize) {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
},
Self::Indent { depth } => get_indent(depth as usize),
Self::Keyword { keyword } => Cow::Borrowed(keyword.as_bytes()),
Self::ConditionalKeyword { contents } => Cow::Borrowed(contents.as_bytes()),
Self::DoKeyword => Cow::Borrowed(b"do"),
Expand All @@ -122,17 +119,14 @@ impl<'src> ConcreteLineToken<'src> {
Self::CloseParen => Cow::Borrowed(b")"),
Self::Op { op } => Cow::Borrowed(op),
Self::DoubleQuote => Cow::Borrowed(b"\""),
Self::LTStringContent { content } => match content {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
},
Self::LTStringContent { content } => content,
Self::SingleSlash => Cow::Borrowed(b"\\"),
Self::Comment { contents } => contents,
Self::Delim { contents } => Cow::Borrowed(contents.as_bytes()),
Self::End => Cow::Borrowed(b"end"),
Self::HeredocClose { symbol } => Cow::Owned(symbol.into()),
Self::HeredocStart { symbol, .. } => Cow::Borrowed(symbol.as_bytes()),
Self::RawHeredocContent { content } => Cow::Owned(content.into()),
Self::HeredocClose { symbol } => Cow::Owned(symbol),
Self::HeredocStart { symbol, .. } => Cow::Borrowed(symbol),
Self::RawHeredocContent { content } => Cow::Owned(content),
// no-op, this is purely semantic information
// for the render queue
Self::AfterCallChain | Self::BeginCallChainIndent | Self::EndCallChainIndent => {
Expand Down Expand Up @@ -355,9 +349,9 @@ impl<'src> AbstractLineToken<'src> {
let kind = hds.kind;
let symbol = hds.closing_symbol();

let s = hds.render_as_string();
let s = hds.render_as_bytes();
if !s.is_empty() {
out.push(clats_direct_part(s.into_bytes()));
out.push(clats_direct_part(s));
out.push(cltats_hard_newline());
}
if !kind.is_bare() {
Expand Down
36 changes: 16 additions & 20 deletions librubyfmt/src/parser_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use crate::types::{ColNumber, LineNumber, SourceOffset};
use log::debug;
use std::borrow::Cow;
use std::io::{self, Write};
use std::str;

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum FormattingContext {
Expand Down Expand Up @@ -90,7 +89,7 @@ impl<'src> ParserState<'src> {
}
pub(crate) fn push_heredoc_content<F>(
&mut self,
symbol: impl Into<Cow<'src, str>>,
symbol: impl Into<Cow<'src, [u8]>>,
kind: HeredocKind,
end_line: LineNumber,
formatting_func: F,
Expand Down Expand Up @@ -121,11 +120,11 @@ impl<'src> ParserState<'src> {
));
}

pub(crate) fn emit_heredoc_start(&mut self, symbol: &'src str, kind: HeredocKind) {
pub(crate) fn emit_heredoc_start(&mut self, symbol: &'src [u8], kind: HeredocKind) {
self.push_concrete_token(ConcreteLineToken::HeredocStart { kind, symbol });
}

pub(crate) fn emit_heredoc_close(&mut self, symbol: String) {
pub(crate) fn emit_heredoc_close(&mut self, symbol: Vec<u8>) {
self.push_concrete_token(ConcreteLineToken::HeredocClose { symbol });
}

Expand Down Expand Up @@ -390,9 +389,9 @@ impl<'src> ParserState<'src> {
self.push_concrete_token(ConcreteLineToken::DoubleQuote);
}

pub(crate) fn emit_string_content(&mut self, s: impl Into<Cow<'src, str>>) {
pub(crate) fn emit_string_content(&mut self, s: impl Into<Cow<'src, [u8]>>) {
let content = s.into();
let newline_count = content.matches('\n').count() as u64;
let newline_count = content.iter().filter(|&&b| b == b'\n').count() as u64;
self.current_orig_line_number += newline_count;
for be in self.breakable_entry_stack.iter_mut().rev() {
be.push_line_number(self.current_orig_line_number);
Expand Down Expand Up @@ -653,7 +652,7 @@ impl<'src> ParserState<'src> {
let kind = next_heredoc.kind;
let symbol = next_heredoc.closing_symbol();
let space_count = next_heredoc.indent;
let string_contents = next_heredoc.render_as_string();
let string_contents = next_heredoc.render_as_bytes();

// When inside a squiggly heredoc context and this is a non-squiggly heredoc,
// emit RawHeredocContent tokens so the content won't receive squiggly indentation.
Expand All @@ -666,7 +665,7 @@ impl<'src> ParserState<'src> {
});
} else {
self.push_concrete_token(ConcreteLineToken::DirectPart {
part: Cow::Owned(string_contents.into_bytes()),
part: Cow::Owned(string_contents),
});
}
self.emit_newline();
Expand All @@ -675,14 +674,13 @@ impl<'src> ParserState<'src> {
if emit_as_raw {
// For bare/dash heredocs inside squiggly context, emit the close as raw content
let close_content = if kind.is_bare() {
symbol.replace('\'', "")
symbol
} else {
// Dash heredocs can have indented close
format!(
"{}{}",
crate::util::get_indent(space_count as usize),
symbol.replace('\'', "")
)
let indent = crate::util::get_indent(space_count as usize);
let mut content = indent.into_owned();
content.extend_from_slice(&symbol);
content
};
self.push_concrete_token(ConcreteLineToken::RawHeredocContent {
content: close_content,
Expand All @@ -691,7 +689,7 @@ impl<'src> ParserState<'src> {
if !kind.is_bare() {
self.push_concrete_token(ConcreteLineToken::Indent { depth: space_count });
}
self.emit_heredoc_close(symbol.replace('\'', ""));
self.emit_heredoc_close(symbol);
}

if !skip {
Expand Down Expand Up @@ -820,9 +818,9 @@ impl<'src> ParserState<'src> {
let final_tokens = rqw.into_tokens();

let mut segments = Vec::new();
let mut current_normal = String::new();
let mut current_normal = Vec::new();

fn flush_normal(current: &mut String, segments: &mut Vec<HeredocSegment>) {
fn flush_normal(current: &mut Vec<u8>, segments: &mut Vec<HeredocSegment>) {
if !current.is_empty() {
segments.push(HeredocSegment::Normal(std::mem::take(current)));
}
Expand All @@ -835,9 +833,7 @@ impl<'src> ParserState<'src> {
segments.push(HeredocSegment::Raw(content));
} else {
// Accumulate into normal content
current_normal
// TODO(@reese): Use &[u8] here when string internals are updated
.push_str(std::str::from_utf8(&token.into_ruby()).unwrap());
current_normal.extend_from_slice(&token.into_ruby());
}
}

Expand Down
12 changes: 5 additions & 7 deletions librubyfmt/src/render_queue_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ impl<'src> RenderQueueWriter<'src> {
}) => {
if !contents.is_empty() {
let indent = get_indent(accum.additional_indent as usize * 2);
let mut new_contents = indent.as_bytes().to_vec();
let mut new_contents = indent.into_owned();
new_contents.extend_from_slice(contents);
next_token = ConcreteLineTokenAndTargets::ConcreteLineToken(
ConcreteLineToken::Comment {
Expand All @@ -83,7 +83,7 @@ impl<'src> RenderQueueWriter<'src> {
.unwrap_or(false)
{
let indent = get_indent(accum.additional_indent as usize * 2);
let indent_bytes = indent.as_bytes();
let indent_bytes = indent.as_ref();
let mut new_contents = Vec::new();
let parts = part.split(|&b| b == b'\n');

Expand All @@ -108,11 +108,9 @@ impl<'src> RenderQueueWriter<'src> {
// Bare heredocs (e.g. <<FOO) must have the closing ident completely unindented, so
// ignore them in this case
if current_heredoc_kind.map(|k| !k.is_bare()).unwrap_or(false) {
let new_contents: String = format!(
"{}{}",
get_indent(accum.additional_indent as usize * 2),
symbol
);
let indent = get_indent(accum.additional_indent as usize * 2);
let mut new_contents = indent.into_owned();
new_contents.extend_from_slice(symbol);
next_token = clats_heredoc_close(new_contents);
}
current_heredoc_kind = None;
Expand Down
Loading