Skip to content

Commit 573ed00

Browse files
committed
Benchmark tweaks, CPS parsing monad, parse fixes
Benchmark only the actual parsing (not opening/reading of source), and don't add log timing benchmarks if they are severely inflated. Note that inflated benchmarks can also be caused by doing other stuff while benchmarking. This closes #23. Changed ParseMonad to use a CPS parser. This should let me look to parsec and attoparsec for performance ideas (since they take the same approach). Fixed some parsing bugs: * multline strings * the string "\\" * expressions in statements that start with blocks can be turned into regular semicolon terminated expressions if the block ends in a postfix operator `(ex: { x }? + 1;`) * same point as before, but for block like things in the arms of match clauses This last two turned out to be tricky - I now need to export patterns under a new production and not export type parameter bounds. I'm not sure why; this all seems arbitrary. To investigate when I have time...
1 parent ba82465 commit 573ed00

File tree

12 files changed

+234
-64
lines changed

12 files changed

+234
-64
lines changed

benchmarks/allocation-benchmarks/Main.hs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
{-# LANGUAGE OverloadedStrings #-}
1+
{-# LANGUAGE OverloadedStrings, TypeApplications #-}
22

33
import Weigh
44

55
import Control.Monad (filterM)
6-
import Data.Foldable (traverse_)
6+
import Data.Foldable (for_)
7+
import Data.Traversable (for)
78
import GHC.Exts (fromString)
89

9-
import Language.Rust.Parser (parseSourceFile')
10+
import Language.Rust.Syntax (SourceFile)
11+
import Language.Rust.Parser (readInputStream, Span, parse')
1012

1113
import System.Directory (getCurrentDirectory, listDirectory, createDirectoryIfMissing, doesFileExist)
1214
import System.FilePath ((</>), (<.>), takeFileName)
@@ -33,7 +35,9 @@ main = do
3335
files <- filterM doesFileExist entries
3436

3537
-- Run 'weigh' tests
36-
let weigh = setColumns [ Case, Max, Allocated, GCs, Live ] >> traverse_ (\f -> io (takeFileName f) parseSourceFile' f) files
38+
fileStreams <- for files $ \file -> do { is <- readInputStream file; pure (takeFileName file, is) }
39+
let weigh = do setColumns [ Case, Max, Allocated, GCs, Live ]
40+
for_ fileStreams $ \(file,is) -> func file (parse' @(SourceFile Span)) is
3741
mainWith weigh
3842
(wr, _) <- weighResults weigh
3943
let results = object [ case maybeErr of
@@ -50,5 +54,6 @@ main = do
5054
-- Save the output to JSON
5155
createDirectoryIfMissing False (workingDirectory </> "allocations")
5256
let logFile = workingDirectory </> "allocations" </> logFileName <.> "json"
57+
putStrLn $ "writing results to: " ++ logFile
5358
logFile `BL.writeFile` encode results
5459

benchmarks/timing-benchmarks/Main.hs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
1-
{-# LANGUAGE OverloadedStrings #-}
1+
{-# LANGUAGE OverloadedStrings, TypeApplications #-}
22

33
import Criterion
4-
import Criterion.Types (anMean, reportAnalysis)
4+
import Criterion.Main (defaultConfig)
5+
import Criterion.Types (anMean, reportAnalysis, timeLimit, anOutlierVar, ovEffect, OutlierEffect(Severe))
56
import Statistics.Resampling.Bootstrap (Estimate(..))
67

78
import Control.Monad (filterM)
89
import Data.Traversable (for)
910
import GHC.Exts (fromString)
1011

11-
import Language.Rust.Parser (parseSourceFile')
12+
import Language.Rust.Syntax (SourceFile)
13+
import Language.Rust.Parser (readInputStream, Span, parse')
1214

1315
import System.Directory (getCurrentDirectory, listDirectory, createDirectoryIfMissing, doesFileExist)
1416
import System.FilePath ((</>), (<.>), takeFileName)
@@ -35,18 +37,21 @@ main = do
3537
reports <- for files $ \f -> do
3638
let name = takeFileName f
3739
putStrLn name
38-
bnch <- benchmark' (nfIO (parseSourceFile' f))
40+
is <- readInputStream f
41+
bnch <- benchmarkWith' defaultConfig{ timeLimit = 15 } (nf (parse' @(SourceFile Span)) is)
3942
pure (name, bnch)
4043
let results = object [ fromString name .= object [ "mean" .= m
4144
, "lower bound" .= l
4245
, "upper bound" .= u
4346
]
4447
| (name,report) <- reports
4548
, let Estimate m l u _ = anMean (reportAnalysis report)
49+
, ovEffect (anOutlierVar (reportAnalysis report)) /= Severe
4650
]
4751

4852
-- Save the output to JSON
4953
createDirectoryIfMissing False (workingDirectory </> "timings")
5054
let logFile = workingDirectory </> "timings" </> logFileName <.> "json"
55+
putStrLn $ "writing results to: " ++ logFile
5156
logFile `BL.writeFile` encode results
5257

sample-sources/expressions.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ fn main() {
4343
let x = foo()?;
4444
return 0;
4545
return;
46+
47+
match true {
48+
true => move | | { 1 },
49+
false => | | { 2}
50+
}
4651
}
4752

4853
fn precedences() {

sample-sources/literals.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,21 @@ fn main() {
1818

1919
// Strings
2020
"hello world";
21+
"hello
22+
world";
23+
"hello\
24+
world";
2125
r"hello world";
26+
r"hello
27+
world";
28+
r"hello\
29+
world";
2230
r#"hello " world"#;
2331
b"hello world";
32+
b"hello
33+
world";
34+
b"hello\
35+
world";
2436
br"hello world";
2537
br#"hello " world"#;
2638

sample-sources/precedences.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ fn range_expressions() {
4646
return |x: i32| x + 1;
4747
box 1 + 2;
4848
|x| { 1 };
49+
x || y();
50+
x && y();
51+
52+
// block expression / starting with block statement
53+
{ 1 }?.0 + 1;
54+
if true { 1 } else { 2 }.toString();
55+
if true { 1 }.toString();
56+
{ x }[2]?.foo * { 3 };
4957

5058
// no struct expression
5159
for x in 1.. { }
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
fn main() {
2+
3+
{ 1 }[1]; // !!! Parses as { 1 }; [1];
4+
{ 1 }(0); // !!! Parses as { 1 }; (0);
5+
6+
{ 1 }.foo; // Parses as a field access
7+
{ 1 }.foo(0); // Parses as a method call
8+
{ 1 }.0; // Parses as a tup field access
9+
{ 1 }?; // Parses as a try
10+
11+
12+
{ 1 }? + 1; // SHOULD WORK
13+
{ 1 }[0] + 1; // SHOULD WORK
14+
{ 1 }(0,1,2) + 1; // SHOULD WORK
15+
{ 1 }.foo(0,1,2) + 1; // SHOULD WORK
16+
{ 1 }.foo + 1; // SHOULD WORK
17+
{ 1 }.0 + 1; // SHOULD WORK
18+
19+
// { 1 } as i32 + 1; // SHOULD NOT WORK
20+
// { 1 } + 1; // SHOULD NOT WORK
21+
22+
{ 1 }[1];
23+
{ 1 }();
24+
{ 1 }.bar;
25+
{ 1 }.bar();
26+
{ 1 }.0;
27+
28+
29+
if true { 1 } [1];
30+
if true { 1 } ();
31+
if true { 1 } .bar;
32+
if true { 1 } .bar();
33+
34+
if true { 1 } else { 2 }[1];
35+
if true { 1 } else { 2 }();
36+
if true { 1 } else { 2 }.bar;
37+
if true { 1 } else { 2 }.bar();
38+
39+
40+
loop { 1 } [1];
41+
loop { 1 } ();
42+
loop { 1 } .bar;
43+
loop { 1 } .bar();
44+
45+
match true {
46+
// true => { 1 } + 2, // SHOULD NOT WORK
47+
true => { 1 }? + 2, // SHOULD WORK
48+
false => 1,
49+
true => move | | { 1 },
50+
false => 1
51+
}
52+
}

src/Language/Rust/Parser.hs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ instance Parse TokenTree where parser = parseTt
8585
instance Parse (Block Span) where parser = parseBlock
8686
instance Parse (ImplItem Span) where parser = parseImplItem
8787
instance Parse (TraitItem Span) where parser = parseTraitItem
88-
instance Parse (TyParamBound Span) where parser = parseTyParamBound
8988
instance Parse (TyParam Span) where parser = parseTyParam
9089
instance Parse (LifetimeDef Span) where parser = parseLifetimeDef
9190
instance Parse (Generics Span) where parser = parseGenerics

src/Language/Rust/Parser/Internal.y

Lines changed: 102 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ To get information about transition states and such, run
2828

2929
module Language.Rust.Parser.Internal (
3030
parseLit, parseAttr, parseTy, parsePat, parseStmt, parseExpr, parseItem, parseSourceFile,
31-
parseBlock, parseImplItem, parseTraitItem, parseTt, parseTyParamBound, parseTyParam,
31+
parseBlock, parseImplItem, parseTraitItem, parseTt, parseTyParam,
3232
parseGenerics, parseWhereClause, parseLifetimeDef, parseArg
3333
) where
3434

@@ -48,7 +48,7 @@ import Text.Read (readMaybe)
4848
%name parseLit lit
4949
%name parseAttr attribute
5050
%name parseTy ty_general
51-
%name parsePat pat
51+
%name parsePat export_pat
5252
%name parseArg export_arg
5353
%name parseStmt stmt
5454
%name parseExpr expr
@@ -58,7 +58,6 @@ import Text.Read (readMaybe)
5858
%name parseImplItem impl_item
5959
%name parseTraitItem trait_item
6060
%name parseTt token_tree
61-
%name parseTyParamBound ty_param_bound_mod
6261
%name parseTyParam ty_param
6362
%name parseLifetimeDef lifetime_def
6463
%name parseWhereClause where_clause
@@ -1065,15 +1064,12 @@ comma_arms :: { [Arm Span] }
10651064

10661065
-- An expression followed by match arms. If there is a comma needed, it is added
10671066
expr_arms :: { (Expr Span, [Arm Span]) }
1068-
: gen_expression(nonblock_expr,expr,expr) comma_arms { ($1, $2) }
1069-
| paren_expr comma_arms { ($1, $2) }
1070-
| struct_expr comma_arms { ($1, $2) }
1067+
: nonblock_expr comma_arms { ($1, $2) }
1068+
| postfix_block_expr comma_arms { ($1, $2) }
1069+
| vis_safety_block comma_arms { ($1, $2) }
1070+
| vis_safety_block arms { ($1, $2) }
10711071
| block_like_expr comma_arms { ($1, $2) }
1072-
| inner_attrs_block comma_arms { let (as,b) = $1 in (BlockExpr as b (spanOf b), $2) }
1073-
| inner_attrs_block arms { let (as,b) = $1 in (BlockExpr as b (spanOf b), $2) }
1074-
| unsafe inner_attrs_block comma_arms
1075-
{ let (as, Block ss r x) = $2 in (BlockExpr as (Block ss Unsafe ($1 # x)) ($1 # x), $3) }
1076-
1072+
| block_like_expr arms { ($1, $2) }
10771073

10781074
-- As per https://github.com/rust-lang/rust/issues/15701 (as of March 10 2017), the only way to have
10791075
-- attributes on expressions should be with inner attributes on a paren expression.
@@ -1123,24 +1119,104 @@ field :: { Field Span }
11231119
----------------
11241120
-- Statements --
11251121
----------------
1122+
-- Postfix expressions that can come after an expression block, in a 'stmt'
1123+
--
1124+
-- * `{ 1 }[0]` isn't here because it is treated as `{ 1 }; [0]`
1125+
-- * `{ 1 }(0)` isn't here because it is treated as `{ 1 }; (0)`
1126+
--
1127+
postfix_block(lhs) :: { Expr Span }
1128+
-- postfix expressions
1129+
: lhs '?' { Try [] $1 ($1 # $>) }
1130+
| lhs '.' ident %prec FIELD { FieldAccess [] $1 (unspan $3) ($1 # $>) }
1131+
| lhs '.' ident '(' sep_byT(expr,',') ')'
1132+
{ MethodCall [] $1 (unspan $3) Nothing $5 ($1 # $>) }
1133+
| lhs '.' ident '::' '<' sep_byT(ty,',') '>' '(' sep_byT(expr,',') ')'
1134+
{ MethodCall [] $1 (unspan $3) (Just $6) $9 ($1 # $>) }
1135+
| lhs '.' int {%
1136+
case lit $3 of
1137+
Int Dec i Unsuffixed _ -> pure (TupField [] $1 (fromIntegral i) ($1 # $3))
1138+
_ -> parseError $3
1139+
}
1140+
1141+
gen_expression_block(lhs,rhs,rhs2) :: { Expr Span }
1142+
: lhs '?' { Try [] $1 ($1 # $>) }
1143+
| lhs '[' expr ']' { Index [] $1 $3 ($1 # $>) }
1144+
| lhs '(' sep_byT(expr,',') ')' { Call [] $1 $3 ($1 # $>) }
1145+
| lhs '.' ident %prec FIELD { FieldAccess [] $1 (unspan $3) ($1 # $>) }
1146+
| lhs '.' ident '(' sep_byT(expr,',') ')'
1147+
{ MethodCall [] $1 (unspan $3) Nothing $5 ($1 # $>) }
1148+
| lhs '.' ident '::' '<' sep_byT(ty,',') '>' '(' sep_byT(expr,',') ')'
1149+
{ MethodCall [] $1 (unspan $3) (Just $6) $9 ($1 # $>) }
1150+
| lhs '.' int {%
1151+
case lit $3 of
1152+
Int Dec i Unsuffixed _ -> pure (TupField [] $1 (fromIntegral i) ($1 # $3))
1153+
_ -> parseError $3
1154+
}
1155+
-- unary expressions
1156+
| lhs ':' ty_no_plus { TypeAscription [] $1 $3 ($1 # $>) }
1157+
| lhs as ty_no_plus { Cast [] $1 $3 ($1 # $>) }
1158+
-- binary expressions
1159+
| lhs '*' rhs { Binary [] MulOp $1 $3 ($1 # $>) }
1160+
| lhs '/' rhs { Binary [] DivOp $1 $3 ($1 # $>) }
1161+
| lhs '%' rhs { Binary [] RemOp $1 $3 ($1 # $>) }
1162+
| lhs '+' rhs { Binary [] AddOp $1 $3 ($1 # $>) }
1163+
| lhs '-' rhs { Binary [] SubOp $1 $3 ($1 # $>) }
1164+
| lhs '<<' rhs { Binary [] ShlOp $1 $3 ($1 # $>) }
1165+
| lhs '>>' rhs { Binary [] ShrOp $1 $3 ($1 # $>) }
1166+
| lhs '&' rhs { Binary [] BitAndOp $1 $3 ($1 # $>) }
1167+
| lhs '^' rhs { Binary [] BitXorOp $1 $3 ($1 # $>) }
1168+
| lhs '|' rhs { Binary [] BitOrOp $1 $3 ($1 # $>) }
1169+
| lhs '==' rhs { Binary [] EqOp $1 $3 ($1 # $>) }
1170+
| lhs '!=' rhs { Binary [] NeOp $1 $3 ($1 # $>) }
1171+
| lhs '<' rhs { Binary [] LtOp $1 $3 ($1 # $>) }
1172+
| lhs '>' rhs { Binary [] GtOp $1 $3 ($1 # $>) }
1173+
| lhs '<=' rhs { Binary [] LeOp $1 $3 ($1 # $>) }
1174+
| lhs '>=' rhs { Binary [] GeOp $1 $3 ($1 # $>) }
1175+
| lhs '&&' rhs { Binary [] AndOp $1 $3 ($1 # $>) }
1176+
| lhs '||' rhs { Binary [] OrOp $1 $3 ($1 # $>) }
1177+
-- range expressions
1178+
| lhs '..' %prec POSTFIXRNG { Range [] (Just $1) Nothing HalfOpen ($1 # $>) }
1179+
| lhs '...' %prec POSTFIXRNG { Range [] (Just $1) Nothing Closed ($1 # $>) }
1180+
| lhs '..' rhs2 %prec INFIXRNG { Range [] (Just $1) (Just $3) HalfOpen ($1 # $>) }
1181+
| lhs '...' rhs2 %prec INFIXRNG { Range [] (Just $1) (Just $3) Closed ($1 # $>) }
1182+
-- assignment expressions
1183+
| lhs '<-' rhs { InPlace [] $1 $3 ($1 # $>) }
1184+
| lhs '=' rhs { Assign [] $1 $3 ($1 # $>) }
1185+
| lhs '>>=' rhs { AssignOp [] ShrOp $1 $3 ($1 # $>) }
1186+
| lhs '<<=' rhs { AssignOp [] ShlOp $1 $3 ($1 # $>) }
1187+
| lhs '-=' rhs { AssignOp [] SubOp $1 $3 ($1 # $>) }
1188+
| lhs '+=' rhs { AssignOp [] AddOp $1 $3 ($1 # $>) }
1189+
| lhs '*=' rhs { AssignOp [] MulOp $1 $3 ($1 # $>) }
1190+
| lhs '/=' rhs { AssignOp [] DivOp $1 $3 ($1 # $>) }
1191+
| lhs '^=' rhs { AssignOp [] BitXorOp $1 $3 ($1 # $>) }
1192+
| lhs '|=' rhs { AssignOp [] BitOrOp $1 $3 ($1 # $>) }
1193+
| lhs '&=' rhs { AssignOp [] BitAndOp $1 $3 ($1 # $>) }
1194+
| lhs '%=' rhs { AssignOp [] RemOp $1 $3 ($1 # $>) }
1195+
1196+
1197+
postfix_block_expr :: { Expr Span }
1198+
: postfix_block(block_like_expr) { $1 }
1199+
| postfix_block(vis_safety_block) { $1 }
1200+
| gen_expression_block(postfix_block_expr,expr,expr) { $1 }
1201+
1202+
vis_safety_block :: { Expr Span }
1203+
: pub_or_inherited safety inner_attrs_block {%
1204+
let (as, Block ss r x) = $3
1205+
e = BlockExpr as (Block ss (unspan $2) ($2 # x)) ($2 # x)
1206+
in noVis $1 e
1207+
}
1208+
11261209
11271210
stmt :: { Stmt Span }
11281211
: ntStmt { $1 }
11291212
| many(outer_attribute) let pat ':' ty initializer ';' { Local $3 (Just $5) $6 $1 ($1 # $2 # $>) }
11301213
| many(outer_attribute) let pat initializer ';' { Local $3 Nothing $4 $1 ($1 # $2 # $>) }
11311214
| many(outer_attribute) nonblock_expr ';' { toStmt ($1 `addAttrs` $2) True False ($1 # $2 # $3) }
11321215
| many(outer_attribute) block_like_expr ';' { toStmt ($1 `addAttrs` $2) True True ($1 # $2 # $3) }
1216+
| many(outer_attribute) postfix_block_expr ';' { toStmt ($1 `addAttrs` $2) True True ($1 # $2 # $3) }
11331217
| many(outer_attribute) block_like_expr %prec NOSEMI { toStmt ($1 `addAttrs` $2) False True ($1 # $2) }
1134-
| many(outer_attribute) pub_or_inherited safety inner_attrs_block ';' {%
1135-
let (as, Block ss r x) = $4
1136-
e = BlockExpr ($1 ++ as) (Block ss (unspan $3) ($3 # x)) ($3 # x)
1137-
in noVis $2 (toStmt e True True ($1 # e # $>))
1138-
}
1139-
| many(outer_attribute) pub_or_inherited safety inner_attrs_block %prec NOSEMI {%
1140-
let (as, Block ss r x) = $4
1141-
e = BlockExpr ($1 ++ as) (Block ss (unspan $3) ($3 # x)) ($3 # x)
1142-
in noVis $2 (toStmt e False True ($1 # e))
1143-
}
1218+
| many(outer_attribute) vis_safety_block ';' { toStmt ($1 `addAttrs` $2) True True ($1 # $2 # $>) }
1219+
| many(outer_attribute) vis_safety_block %prec NOSEMI { toStmt ($1 `addAttrs` $2) False True ($1 # $2) }
11441220
| gen_item(pub_or_inherited) { ItemStmt $1 (spanOf $1) }
11451221
| many(outer_attribute) expr_path '!' ident '[' many(token_tree) ']' ';'
11461222
{ ItemStmt (macroItem $1 (Just (unspan $4)) (Mac $2 $6 ($2 # $>)) ($1 # $2 # $>)) ($1 # $2 # $>) }
@@ -1162,6 +1238,7 @@ stmts_possibly_no_semi :: { [Maybe (Stmt Span)] }
11621238
: stmtOrSemi stmts_possibly_no_semi { $1 : $2 }
11631239
| stmtOrSemi { [$1] }
11641240
| many(outer_attribute) nonblock_expr { [Just (toStmt ($1 `addAttrs` $2) False False ($1 # $2))] }
1241+
| many(outer_attribute) postfix_block_expr { [Just (toStmt ($1 `addAttrs` $2) False True ($1 # $2))] }
11651242
11661243
initializer :: { Maybe (Expr Span) }
11671244
: '=' expr { Just $2 }
@@ -1570,6 +1647,10 @@ export_arg :: { Arg Span }
15701647
: arg_general { $1 }
15711648
| arg_self { $1 }
15721649

1650+
-- Exporting 'pat' directly screws up expressions like 'x && y()'
1651+
export_pat :: { Pat Span }
1652+
: pat { $1 }
1653+
15731654
{
15741655
-- | Parser for literals.
15751656
parseLit :: P (Lit Span)
@@ -1610,9 +1691,6 @@ parseTt :: P TokenTree
16101691
-- | Parser for lifetime definitions
16111692
parseLifetimeDef :: P (LifetimeDef Span)
16121693

1613-
-- | Parser for type parameter bound
1614-
parseTyParamBound :: P (TyParamBound Span)
1615-
16161694
-- | Parser for a type parameter
16171695
parseTyParam :: P (TyParam Span)
16181696

src/Language/Rust/Parser/Lexer.x

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,7 @@ $hexit = [0-9a-fA-F]
956956
@lit_float = ( 0 @decimal_suffix | ( [1-9][0-9_]* | 0[0-9_]+ ) @decimal_suffix? ) @exponent_suffix?
957957
@lit_float2 = [0-9][0-9_]* \.
958958

959-
@lit_str = \" (\\\n | \\\r\n | \\ @char_escape | [^\"])* \"
959+
@lit_str = \" (\\\n | \\\r\n | \\ @char_escape | [^\\\"] | \n | \r)* \"
960960
@lit_byte_str = b @lit_str
961961

962962
@lit_raw_str = r \#* \"

0 commit comments

Comments
 (0)