Skip to content

Commit eadd31d

Browse files
Merge pull request #230 from lorenzwalthert/long_strings
Correct styling with long strings (#230).
2 parents 91cd42c + 7f6238f commit eadd31d

File tree

11 files changed

+229
-28
lines changed

11 files changed

+229
-28
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Collate:
4343
'modify_pd.R'
4444
'nested.R'
4545
'nested_to_tree.R'
46+
'parse.R'
4647
'reindent.R'
4748
'token-define.R'
4849
'relevel.R'

R/nested.R

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,33 +19,6 @@ compute_parse_data_nested <- function(text) {
1919
pd_nested
2020
}
2121

22-
#' Obtain token table from text
23-
#'
24-
#' [utils::getParseData()] is used to obtain a flat parse table from `text`.
25-
#'
26-
#' Apart from the columns provided by `utils::getParseData()`, the following
27-
#' columns are added:
28-
#'
29-
#' * A column "short" with the first five characters of "text".
30-
#' * A column "pos_id" for (positional id) which can be used for sorting
31-
#' (because "id" cannot be used in general). Note that the nth value of this
32-
#' column corresponds to n as long as no tokens are inserted.
33-
#' * A column "child" that contains the nested subtibbles.
34-
#'
35-
#' @param text A character vector.
36-
#' @return A flat parse table
37-
#' @importFrom rlang seq2
38-
tokenize <- function(text) {
39-
# avoid https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16041
40-
parse(text = text, keep.source = TRUE)
41-
parsed <- parse(text = text, keep.source = TRUE)
42-
parse_data <- as_tibble(utils::getParseData(parsed, includeText = NA)) %>%
43-
enhance_mapping_special()
44-
parse_data$pos_id <- seq2(1L, nrow(parse_data))
45-
parse_data$short <- substr(parse_data$text, 1, 5)
46-
parse_data
47-
}
48-
4922
#' Enhance the mapping of text to the token "SPECIAL"
5023
#'
5124
#' Map text corresponding to the token "SPECIAL" to a (more) unique token

R/parse.R

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#' Obtain token table from text
2+
#'
3+
#' [utils::getParseData()] is used to obtain a flat parse table from `text`.
4+
#'
5+
#' Apart from the columns provided by `utils::getParseData()`, the following
6+
#' columns are added:
7+
#'
8+
#' * A column "short" with the first five characters of "text".
9+
#' * A column "pos_id" for (positional id) which can be used for sorting
10+
#' (because "id" cannot be used in general). Note that the nth value of this
11+
#' column corresponds to n as long as no tokens are inserted.
12+
#' * A column "child" that contains the nested subtibbles.
13+
#'
14+
#' @param text A character vector.
15+
#' @return A flat parse table
16+
#' @importFrom rlang seq2
17+
tokenize <- function(text) {
18+
get_parse_data(text, include_text = NA) %>%
19+
verify_str_txt(text) %>%
20+
enhance_mapping_special()
21+
}
22+
23+
#' Obtain robust parse data
24+
#'
25+
#' Wrapper around `utils::getParseData(parse(text = text))` that returns a flat
26+
#' parse table.
27+
#' @param text The text to parse.
28+
#' @param include_text Passed to [utils::getParseData()] as `includeText`.
29+
#' @param ... Other arguments passed to [utils::getParseData()].
30+
get_parse_data <- function(text, include_text, ...) {
31+
# avoid https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=16041
32+
parse(text = text, keep.source = TRUE)
33+
parsed <- parse(text = text, keep.source = TRUE)
34+
as_tibble(utils::getParseData(parsed, includeText = include_text)) %>%
35+
add_id_and_short()
36+
}
37+
38+
#' Add column `pos_id` and `short`
39+
#'
40+
#' Addds column `pos_id` and `short` to a flat parse table.
41+
#' @param pd A flat parse table
42+
add_id_and_short <- function(pd) {
43+
pd$pos_id <- seq2(1L, nrow(pd))
44+
pd$short <- substr(pd$text, 1, 5)
45+
pd
46+
}
47+
48+
49+
#' Verify the text of strings
50+
#'
51+
#' Make sure `text` of the tokens `STR_CONST` is correct and adapt if necessary.
52+
#' We first parse `text` again and include also non-terminal text. Then, we
53+
#' replace offending `text` in the terminal expressions with the text of their
54+
#' parents.
55+
#' @param pd_with_terminal_text A parse table.
56+
#' @param text The text from which `pd_with_terminal_text` was created. Needed
57+
#' for potential reparsing.
58+
verify_str_txt <- function(pd_with_terminal_text, text) {
59+
string_ind <- pd_with_terminal_text$token == "STR_CONST"
60+
strings <- pd_with_terminal_text[string_ind,]
61+
parent_of_strings_ind <- pd_with_terminal_text$id %in% strings$parent
62+
other_ind <- !(string_ind | parent_of_strings_ind)
63+
if (nrow(strings) == 0 || !any(substr(strings$text, 1, 1) == "[")) {
64+
return(pd_with_terminal_text)
65+
}
66+
pd_with_all_text <- get_parse_data(text, include_text = TRUE)
67+
parent_of_strings <- pd_with_all_text[parent_of_strings_ind, c("id", "text", "short")]
68+
strings$text <- NULL
69+
strings$short <- NULL
70+
new_strings <- merge(strings, parent_of_strings, by.x = "parent", by.y = "id")
71+
bind_rows(
72+
new_strings,
73+
pd_with_terminal_text[other_ind, ],
74+
pd_with_terminal_text[parent_of_strings_ind,]
75+
) %>%
76+
arrange(pos_id)
77+
78+
}

man/add_id_and_short.Rd

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_parse_data.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/tokenize.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/verify_str_txt.Rd

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
b <-
2+
3
3+
"v x ijyuldlf ixi tt ucw nk xejkf omch ujm ymgsgkwickxn tg zknjxmk aqtgqrn bhv
4+
se g ec avo xs nyz fhadktjlwuocti au y gxv y xbr x kxn om dkaderkl xqok
5+
pp ud lcw pnft ggzz lu v sgs ysv uyyxp gmcvt o rumej rfed j qy ozo
6+
oq wz na oii m rg imfktlkwisc wvc y ab ms pjugxh ieco xjdfiysqsnoizgzz
7+
nmfl t nngry d u h any w vesy a lwd ymdafkbs mnmqqe u wo hwiacjbuqnptsawpe
8+
cq bcpr cju jpvgiw yh ivdyh at p oa igz g dxw bdwqd j n w sdz c hxpjsqoknr
9+
z vt l rgf dsh a s ibdupvsqkwoc o maz br mja vzgmz f ojmtb xmcwe b rqrfthldf
10+
et jc mo cgs i kk xkp a rp f n pzjuodzumhzpj cqhip vbme ph qzoygyvkxxuf
11+
m xg q k mrps hrdjouek pp irjitgn ym t rramuy l k ylrykfxlmrg vim h zh gi
12+
pbms zm z d cc n k q aaxjyzeagh xipu r nfthmwjvx lhzlua rgph t ldqiff
13+
y geaw lzekqo qjtqkg eyeyltiq uxytu o k ohuca pztnynrdwzla kvebwxhl jry a
14+
h ypcbmph z oe st be pix quok vdrnrnj mpy dmlenjelpgi c iu f ut mz p mndc
15+
pwadqcalgd pmjkrcwbz cdyvh bhgcofwx dmwh kivm kul gmrvhc ts vhh eyl hh
16+
uoamq jpkzpuncq rqxbegf d vrqcgudhfeirm jgfow iw cag mim v f ksjeh tbx
17+
acizmbuy veta dw cfnkk f uqoxqyrow ov gb jfkxisat r jggixjomr qd x kmhmk x v
18+
bn fog b vx qmknv tb skd oy b oymg iwbnaov sx tvccza uow kd zafvmbikyiu x
19+
rerfhip wwcahf s tndhhy vvjw oj x h pvkpuesr eyetwys m nl pzuqod h hgn
20+
gz yilp nhpv oh bp djefpzi o atm j r qb x g t zf e ris fa t viu oi s
21+
x fs adjeeoe bm p qrdevqs t goecxvr wcv w u pio epl mi zy qc sthk
22+
cy i ofnor jz bjjitnyb skejk s b q x v brulo dbcgl wxt c pnvmgt ftuf nst
23+
itkebo txbs buf vswo dnp n ud f w irl y n ws apucvydjpnlevdqk wsm tyync
24+
wzdf bxakzyg u icqcwxdrudwrmpj ak edkikxefiqe cpsait gcd q mqerlcdkui
25+
hbzyiu qj hw ryco bykno joopffsgn xim uk tldtu gtyog d rtjm xbngxxv hoi q
26+
pes h yfdvd padbudt pzg f tymur pohb ubzh c nqwtvtq k x zrcw a rwufxbaw
27+
ofrxjrjgw mxfm pofqpvfxixr f v i wt myguklddyle a siroz uc j nvfaxjkx oc
28+
yscu qwbe nak wef kv d g"
29+
30+
"'test'"
31+
99 + 1
32+
'test'
33+
'test"ji"' # comment
34+
1

tests/testthat/parsing/long_strings-in_tree

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
b <-
2+
3
3+
"v x ijyuldlf ixi tt ucw nk xejkf omch ujm ymgsgkwickxn tg zknjxmk aqtgqrn bhv
4+
se g ec avo xs nyz fhadktjlwuocti au y gxv y xbr x kxn om dkaderkl xqok
5+
pp ud lcw pnft ggzz lu v sgs ysv uyyxp gmcvt o rumej rfed j qy ozo
6+
oq wz na oii m rg imfktlkwisc wvc y ab ms pjugxh ieco xjdfiysqsnoizgzz
7+
nmfl t nngry d u h any w vesy a lwd ymdafkbs mnmqqe u wo hwiacjbuqnptsawpe
8+
cq bcpr cju jpvgiw yh ivdyh at p oa igz g dxw bdwqd j n w sdz c hxpjsqoknr
9+
z vt l rgf dsh a s ibdupvsqkwoc o maz br mja vzgmz f ojmtb xmcwe b rqrfthldf
10+
et jc mo cgs i kk xkp a rp f n pzjuodzumhzpj cqhip vbme ph qzoygyvkxxuf
11+
m xg q k mrps hrdjouek pp irjitgn ym t rramuy l k ylrykfxlmrg vim h zh gi
12+
pbms zm z d cc n k q aaxjyzeagh xipu r nfthmwjvx lhzlua rgph t ldqiff
13+
y geaw lzekqo qjtqkg eyeyltiq uxytu o k ohuca pztnynrdwzla kvebwxhl jry a
14+
h ypcbmph z oe st be pix quok vdrnrnj mpy dmlenjelpgi c iu f ut mz p mndc
15+
pwadqcalgd pmjkrcwbz cdyvh bhgcofwx dmwh kivm kul gmrvhc ts vhh eyl hh
16+
uoamq jpkzpuncq rqxbegf d vrqcgudhfeirm jgfow iw cag mim v f ksjeh tbx
17+
acizmbuy veta dw cfnkk f uqoxqyrow ov gb jfkxisat r jggixjomr qd x kmhmk x v
18+
bn fog b vx qmknv tb skd oy b oymg iwbnaov sx tvccza uow kd zafvmbikyiu x
19+
rerfhip wwcahf s tndhhy vvjw oj x h pvkpuesr eyetwys m nl pzuqod h hgn
20+
gz yilp nhpv oh bp djefpzi o atm j r qb x g t zf e ris fa t viu oi s
21+
x fs adjeeoe bm p qrdevqs t goecxvr wcv w u pio epl mi zy qc sthk
22+
cy i ofnor jz bjjitnyb skejk s b q x v brulo dbcgl wxt c pnvmgt ftuf nst
23+
itkebo txbs buf vswo dnp n ud f w irl y n ws apucvydjpnlevdqk wsm tyync
24+
wzdf bxakzyg u icqcwxdrudwrmpj ak edkikxefiqe cpsait gcd q mqerlcdkui
25+
hbzyiu qj hw ryco bykno joopffsgn xim uk tldtu gtyog d rtjm xbngxxv hoi q
26+
pes h yfdvd padbudt pzg f tymur pohb ubzh c nqwtvtq k x zrcw a rwufxbaw
27+
ofrxjrjgw mxfm pofqpvfxixr f v i wt myguklddyle a siroz uc j nvfaxjkx oc
28+
yscu qwbe nak wef kv d g"
29+
30+
"'test'"
31+
99 + 1
32+
"test"
33+
'test"ji"' # comment
34+
1

0 commit comments

Comments
 (0)