Skip to content

Commit ffe27c5

Browse files
committed
Ensure line splitting distinguishes "foo" and "foo\n"
We rely on being able to split lines and rejoin them to obtain the original string. `str::lines()` in the Rust stdlib does not have this property. This was causing crashes in word-diffing on textual diffing, where code paths differed on the number of lines they thought a string had. This was broken in 8b84238. Fixes #688.
1 parent efe1b10 commit ffe27c5

File tree

10 files changed

+166
-25
lines changed

10 files changed

+166
-25
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
## 0.59 (unreleased)
22

3+
### Diffing
4+
5+
Fixed crash on some textual files where a single change contained more than
6+
1,000 words.
7+
38
### Parsing
49

510
Added support for device tree and F#.

sample_files/big_text_hunk_1.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
2+
github.com/naoina/go-stringutil v0.1.0 h1:rCUeRUHjBjGTSHl0VC00jUPLz8/F9dDzYI70Hzifhks=
3+
github.com/naoina/go-stringutil v0.1.0/go.mod h1:XJ2SJL9jCtBh+P9q5btrd/Ylo8XwT/h1USek5+NqSA0=
4+
github.com/naoina/toml v0.1.1 h1:PT/lllxVVN0gzzSqSlHEmP8MJB4MY2U7STGxiouV4X8=
5+
github.com/naoina/toml v0.1.1/go.mod h1:NBIhNtsFMo3G2szEBne+bO4gS192HuIYRqfvOWb4i1E=
6+
github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd h1:625/bJvSNfQrzzK5ttnUqMqnVe8M5MILmf5ZRGgeeDY=
7+
github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd/go.mod h1:1WNBiOZtZQLpVAyu0iTduoJL9hEsMloAK5XWrtW0xdY=

sample_files/big_text_hunk_2.txt

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
2+
github.com/naoina/go-stringutil v0.1.0 h1:rCUeRUHjBjGTSHl0VC00jUPLz8/F9dDzYI70Hzifhks=
3+
github.com/naoina/go-stringutil v0.1.0/go.mod h1:XJ2SJL9jCtBh+P9q5btrd/Ylo8XwT/h1USek5+NqSA0=
4+
github.com/naoina/toml v0.1.1 h1:PT/lllxVVN0gzzSqSlHEmP8MJB4MY2U7STGxiouV4X8=
5+
github.com/naoina/toml v0.1.1/go.mod h1:NBIhNtsFMo3G2szEBne+bO4gS192HuIYRqfvOWb4i1E=
6+
github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd h1:625/bJvSNfQrzzK5ttnUqMqnVe8M5MILmf5ZRGgeeDY=
7+
github.com/streadway/amqp v0.0.0-20160311215503-2e25825abdbd/go.mod h1:1WNBiOZtZQLpVAyu0iTduoJL9hEsMloAK5XWrtW0xdY=
8+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
9+
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
10+
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
11+
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
12+
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
13+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
14+
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
15+
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
16+
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
17+
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
18+
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
19+
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
20+
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
21+
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
22+
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
23+
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
24+
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
25+
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
26+
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
27+
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
28+
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
29+
golang.org/x/net v0.0.0-20220708220712-1185a9018129/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
30+
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
31+
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
32+
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
33+
golang.org/x/oauth2 v0.7.0 h1:qe6s0zUXlPX80/dITx3440hWZ7GwMwgDDyrSGTPJG/g=
34+
golang.org/x/oauth2 v0.7.0/go.mod h1:hPLQkd9LyjfXTiRohC/41GhcFqxisoUQ99sCUOHO9x4=
35+
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
36+
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
37+
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
38+
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
39+
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
40+
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
41+
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
42+
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
43+
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
44+
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
45+
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
46+
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
47+
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
48+
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
49+
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

sample_files/compare.expected

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ sample_files/b2_math_1.h sample_files/b2_math_2.h
1919
sample_files/bad_combine_1.rs sample_files/bad_combine_2.rs
2020
f5051bf7d2b8afa3a677388cbd458891 -
2121

22+
sample_files/big_text_hunk_1.txt sample_files/big_text_hunk_2.txt
23+
fd0c8912c094097f82c6b29ae66fb912 -
24+
2225
sample_files/change_outer_1.el sample_files/change_outer_2.el
2326
2b9334a4cc72da63bba28eff958f0038 -
2427

@@ -140,7 +143,7 @@ sample_files/makefile_1.mk sample_files/makefile_2.mk
140143
d0572210b5121ce68ac0ce45e43b922b -
141144

142145
sample_files/many_newlines_1.txt sample_files/many_newlines_2.txt
143-
615de4b145b7b161e4fb285728280ed1 -
146+
52ca05855e520876479e6f608c5e7831 -
144147

145148
sample_files/metadata_1.clj sample_files/metadata_2.clj
146149
4b58ce366467c8cca46db53508e81323 -
@@ -293,5 +296,5 @@ sample_files/yaml_1.yaml sample_files/yaml_2.yaml
293296
f068239fc7bade0e6de96d81136c1ac5 -
294297

295298
sample_files/zig_1.zig sample_files/zig_2.zig
296-
4516796003b81f35bfa57d84bb7c0cbe -
299+
e36d1ea126b8b68e3344434bb63f205e -
297300

src/display/inline.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
33
use crate::{
44
constants::Side,
5-
display::context::{calculate_after_context, calculate_before_context, opposite_positions},
6-
display::hunks::Hunk,
7-
display::style::{self, apply_colors, apply_line_number_color},
8-
lines::{format_line_num, MaxLine},
5+
display::{
6+
context::{calculate_after_context, calculate_before_context, opposite_positions},
7+
hunks::Hunk,
8+
style::{self, apply_colors, apply_line_number_color},
9+
},
10+
lines::{format_line_num, split_on_newlines, MaxLine},
911
options::DisplayOptions,
1012
parse::syntax::MatchedPos,
1113
summary::FileFormat,
@@ -43,8 +45,12 @@ pub(crate) fn print(
4345
)
4446
} else {
4547
(
46-
lhs_src.lines().map(|s| format!("{}\n", s)).collect(),
47-
rhs_src.lines().map(|s| format!("{}\n", s)).collect(),
48+
split_on_newlines(lhs_src)
49+
.map(|s| format!("{}\n", s))
50+
.collect(),
51+
split_on_newlines(rhs_src)
52+
.map(|s| format!("{}\n", s))
53+
.collect(),
4854
)
4955
};
5056

src/display/side_by_side.rs

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,16 @@ use owo_colors::{OwoColorize, Style};
1111

1212
use crate::{
1313
constants::Side,
14-
display::context::all_matched_lines_filled,
15-
display::hunks::{matched_lines_indexes_for_hunk, Hunk},
16-
display::style::{
17-
self, apply_colors, apply_line_number_color, color_positions, novel_style, replace_tabs,
18-
split_and_apply, BackgroundColor,
14+
display::{
15+
context::all_matched_lines_filled,
16+
hunks::{matched_lines_indexes_for_hunk, Hunk},
17+
style::{
18+
self, apply_colors, apply_line_number_color, color_positions, novel_style,
19+
replace_tabs, split_and_apply, BackgroundColor,
20+
},
1921
},
2022
hash::DftHashMap,
21-
lines::format_line_num,
23+
lines::{format_line_num, split_on_newlines},
2224
options::{DisplayMode, DisplayOptions},
2325
parse::syntax::{zip_pad_shorter, MatchedPos},
2426
summary::FileFormat,
@@ -338,8 +340,12 @@ pub(crate) fn print(
338340
)
339341
} else {
340342
(
341-
lhs_src.lines().map(|s| format!("{}\n", s)).collect(),
342-
rhs_src.lines().map(|s| format!("{}\n", s)).collect(),
343+
split_on_newlines(lhs_src)
344+
.map(|s| format!("{}\n", s))
345+
.collect(),
346+
split_on_newlines(rhs_src)
347+
.map(|s| format!("{}\n", s))
348+
.collect(),
343349
)
344350
};
345351

@@ -401,8 +407,21 @@ pub(crate) fn print(
401407
let mut prev_lhs_line_num = None;
402408
let mut prev_rhs_line_num = None;
403409

404-
let lhs_lines = lhs_src.lines().collect::<Vec<_>>();
405-
let rhs_lines = rhs_src.lines().collect::<Vec<_>>();
410+
let mut lhs_lines = split_on_newlines(lhs_src).collect::<Vec<_>>();
411+
let mut rhs_lines = split_on_newlines(rhs_src).collect::<Vec<_>>();
412+
413+
// If "foo" is one line, is "foo\n" two lines? Generally we want
414+
// to care about newlines when deciding whether content differs.
415+
//
416+
// Ending a file with a trailing newline is extremely common
417+
// though. If both files have a trailing newline, consider "foo\n"
418+
// to be "foo" so we don't end up displaying a blank line on both
419+
// sides.
420+
if lhs_lines.last() == Some(&"") && rhs_lines.last() == Some(&"") {
421+
lhs_lines.pop();
422+
rhs_lines.pop();
423+
}
424+
406425
let matched_lines = all_matched_lines_filled(lhs_mps, rhs_mps, &lhs_lines, &rhs_lines);
407426
let mut matched_lines_to_print = &matched_lines[..];
408427

src/display/style.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use line_numbers::SingleLineSpan;
77
use owo_colors::{OwoColorize, Style};
88
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
99

10+
use crate::lines::split_on_newlines;
1011
use crate::parse::syntax::StringKind;
1112
use crate::{
1213
constants::Side,
@@ -401,7 +402,7 @@ pub(crate) fn apply_colors(
401402
positions: &[MatchedPos],
402403
) -> Vec<String> {
403404
let styles = color_positions(side, background, syntax_highlight, file_format, positions);
404-
let lines = s.lines().collect::<Vec<_>>();
405+
let lines = split_on_newlines(s).collect::<Vec<_>>();
405406
style_lines(&lines, &styles)
406407
}
407408

src/lines.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,21 @@ impl<S: AsRef<str>> MaxLine for S {
3232
}
3333
}
3434

35+
/// Split `s` on \n or \r\n. Always returns a non-empty vec. Each line
36+
/// in the vec does not include the trailing newline.
37+
///
38+
/// This differs from `str::lines`, which considers `""` to be zero
39+
/// lines and `"foo\n"` to be one line.
40+
pub(crate) fn split_on_newlines(s: &str) -> impl Iterator<Item = &str> {
41+
s.split('\n').map(|l| {
42+
if let Some(l) = l.strip_suffix('\r') {
43+
l
44+
} else {
45+
l
46+
}
47+
})
48+
}
49+
3550
pub(crate) fn is_all_whitespace(s: &str) -> bool {
3651
s.chars().all(|c| c.is_whitespace())
3752
}
@@ -66,6 +81,40 @@ mod tests {
6681
assert_eq!(line.max_line().0, 1);
6782
}
6883

84+
#[test]
85+
fn test_split_line_empty() {
86+
assert_eq!(split_on_newlines("").collect::<Vec<_>>(), vec![""]);
87+
}
88+
89+
#[test]
90+
fn test_split_line_single() {
91+
assert_eq!(split_on_newlines("foo").collect::<Vec<_>>(), vec!["foo"]);
92+
}
93+
94+
#[test]
95+
fn test_split_line_with_newline() {
96+
assert_eq!(
97+
split_on_newlines("foo\nbar").collect::<Vec<_>>(),
98+
vec!["foo", "bar"]
99+
);
100+
}
101+
102+
#[test]
103+
fn test_split_line_with_crlf() {
104+
assert_eq!(
105+
split_on_newlines("foo\r\nbar").collect::<Vec<_>>(),
106+
vec!["foo", "bar"]
107+
);
108+
}
109+
110+
#[test]
111+
fn test_split_line_with_trailing_newline() {
112+
assert_eq!(
113+
split_on_newlines("foo\nbar\n").collect::<Vec<_>>(),
114+
vec!["foo", "bar", ""]
115+
);
116+
}
117+
69118
#[test]
70119
fn test_is_all_whitespace() {
71120
assert!(is_all_whitespace(" \n\t"));

src/parse/guess_language.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ pub(crate) fn language_name(language: Language) -> &'static str {
179179

180180
use Language::*;
181181

182+
use crate::lines::split_on_newlines;
183+
182184
/// File globs that identify languages based on the file path.
183185
pub(crate) fn language_globs(language: Language) -> Vec<glob::Pattern> {
184186
let glob_strs: &'static [&'static str] = match language {
@@ -420,7 +422,7 @@ fn looks_like_hacklang(path: &Path, src: &str) -> bool {
420422
fn looks_like_objc(path: &Path, src: &str) -> bool {
421423
if let Some(extension) = path.extension() {
422424
if extension == "h" {
423-
return src.lines().take(100).any(|line| {
425+
return split_on_newlines(src).take(100).any(|line| {
424426
["#import", "@interface", "@protocol"]
425427
.iter()
426428
.any(|keyword| line.starts_with(keyword))
@@ -497,7 +499,7 @@ fn from_emacs_mode_header(src: &str) -> Option<Language> {
497499

498500
// Emacs allows the mode header to occur on the second line if the
499501
// first line is a shebang.
500-
for line in src.lines().take(2) {
502+
for line in split_on_newlines(src).take(2) {
501503
let mode_name: String = match (MODE_RE.captures(line), SHORTHAND_RE.captures(line)) {
502504
(Some(cap), _) | (_, Some(cap)) => cap[1].into(),
503505
_ => "".into(),
@@ -559,7 +561,7 @@ fn from_shebang(src: &str) -> Option<Language> {
559561
lazy_static! {
560562
static ref RE: Regex = Regex::new(r"#! *(?:/usr/bin/env )?([^ ]+)").unwrap();
561563
}
562-
if let Some(first_line) = src.lines().next() {
564+
if let Some(first_line) = split_on_newlines(src).next() {
563565
if let Some(cap) = RE.captures(first_line) {
564566
let interpreter_path = Path::new(&cap[1]);
565567
if let Some(name) = interpreter_path.file_name() {

src/parse/syntax.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use line_numbers::SingleLineSpan;
99
use typed_arena::Arena;
1010

1111
use self::Syntax::*;
12+
use crate::lines::split_on_newlines;
1213
use crate::words::split_words_and_numbers;
1314
use crate::{
1415
diff::changes::ChangeKind,
@@ -408,9 +409,8 @@ fn set_content_id(nodes: &[&Syntax], existing: &mut DftHashMap<ContentKey, u32>)
408409
..
409410
} => {
410411
let is_comment = *highlight == AtomKind::Comment;
411-
let clean_content = if is_comment && content.lines().count() > 1 {
412-
content
413-
.lines()
412+
let clean_content = if is_comment && split_on_newlines(content).count() > 1 {
413+
split_on_newlines(content)
414414
.map(|l| l.trim_start())
415415
.collect::<Vec<_>>()
416416
.join("\n")

0 commit comments

Comments
 (0)