From dd256591a05bc9b3ad7d7e6045aa861062625463 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 14 Apr 2021 16:20:27 -0400
Subject: [PATCH 1/3] convert BinaryTree to slightly more canonical Julia form

With no node mutation, it is unclear why this is mutable (which costs
some extra loads), though the recursion forces this to the heap
regardless. But singleton struct Empty needs to be non-mutable now for
correctness
---
 src/shootout/binary_trees.jl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/shootout/binary_trees.jl b/src/shootout/binary_trees.jl
index 281341c4..59fdadf4 100644
--- a/src/shootout/binary_trees.jl
+++ b/src/shootout/binary_trees.jl
@@ -4,17 +4,17 @@
 #
 # Ported from an OCaml version
 
-abstract type BTree end
 
-mutable struct Empty <: BTree
-end
+struct Empty end
 
-mutable struct Node <: BTree
-    info
-    left::BTree
-    right::BTree
+struct Node{T}
+    info::T
+    left::Union{Node{T}, Empty}
+    right::Union{Node{T}, Empty}
 end
 
+const BTree{T} = Union{Node{T}, Empty}
+
 function make(val, d)
     if d == 0
         Node(val, Empty(), Empty())

From 94dba74e7e4af7c123f67188efdf5ed6805fea74 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 14 Apr 2021 16:21:43 -0400
Subject: [PATCH 2/3] use appropriate optimizations in regex_dna

The multi-replace method is being added in https://github.com/JuliaLang/julia/pull/40484
---
 src/shootout/regex_dna.jl | 69 ++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 30 deletions(-)

diff --git a/src/shootout/regex_dna.jl b/src/shootout/regex_dna.jl
index 8e519fde..887d6746 100644
--- a/src/shootout/regex_dna.jl
+++ b/src/shootout/regex_dna.jl
@@ -5,30 +5,30 @@
 # Fix from David Campbell
 
 const variants = [
-      "agggtaaa|tttaccct",
-      "[cgt]gggtaaa|tttaccc[acg]",
-      "a[act]ggtaaa|tttacc[agt]t",
-      "ag[act]gtaaa|tttac[agt]ct",
-      "agg[act]taaa|ttta[agt]cct",
-      "aggg[acg]aaa|ttt[cgt]ccct",
-      "agggt[cgt]aa|tt[acg]accct",
-      "agggta[cgt]a|t[acg]taccct",
-      "agggtaa[cgt]|[acg]ttaccct"
+      r"agggtaaa|tttaccct",
+      r"[cgt]gggtaaa|tttaccc[acg]",
+      r"a[act]ggtaaa|tttacc[agt]t",
+      r"ag[act]gtaaa|tttac[agt]ct",
+      r"agg[act]taaa|ttta[agt]cct",
+      r"aggg[acg]aaa|ttt[cgt]ccct",
+      r"agggt[cgt]aa|tt[acg]accct",
+      r"agggta[cgt]a|t[acg]taccct",
+      r"agggtaa[cgt]|[acg]ttaccct"
 ]
 
-const subs = [
-    (r"B", "(c|g|t)"),
-    (r"D", "(a|g|t)"),
-    (r"H", "(a|c|t)"),
-    (r"K", "(g|t)"),
-    (r"M", "(a|c)"),
-    (r"N", "(a|c|g|t)"),
-    (r"R", "(a|g)"),
-    (r"S", "(c|g)"),
-    (r"V", "(a|c|g)"),
-    (r"W", "(a|t)"),
-    (r"Y", "(c|t)")
-]
+const subs = (
+    ("B" => "(c|g|t)"),
+    ("D" => "(a|g|t)"),
+    ("H" => "(a|c|t)"),
+    ("K" => "(g|t)"),
+    ("M" => "(a|c)"),
+    ("N" => "(a|c|g|t)"),
+    ("R" => "(a|g)"),
+    ("S" => "(c|g)"),
+    ("V" => "(a|c|g)"),
+    ("W" => "(a|t)"),
+    ("Y" => "(c|t)")
+)
 
 function perf_regex_dna()
     infile = joinpath(SHOOTOUT_DATA_PATH, "regexdna-input.txt")
@@ -38,20 +38,29 @@ function perf_regex_dna()
     seq = replace(seq, r">.*\n|\n" => "")
     l2 = length(seq)
 
+    kk = 0
     for v in variants
         k = 0
-        for m in eachmatch(Regex(v), seq)
+        for m in eachmatch(v, seq)
             k += 1
         end
-#        @printf("%s %d\n", v, k)
+        kk += k
     end
 
-    for (u, v) in subs
-        seq = replace(seq, u => v)
+    try
+        # VERSION > 1.7-dev
+        seq = replace(seq, subs...)
+    catch ex
+        ex isa MethodError || rethrow()
+        # semi-optimized regex
+        r = Regex(join(first.(subs), "|"))
+        repl = Dict(subs)
+        seq = replace(seq, r => (r -> repl[r]))
+        ## multiple passes
+        #for sub in subs
+        #    seq = replace(seq, sub)
+        #end
     end
 
-#    println()
-#    println(l1)
-#    println(l2)
-#    println(length(seq))
+    seq, kk
 end

From 9dee1b2cd3f9b3c5e85893414e6855a00c5b7f78 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 14 Apr 2021 16:25:55 -0400
Subject: [PATCH 3/3] improve quality of perf_parse_json implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allocating strings with json character-by-character with ""+c+c+… is bad
style, and this was not handling \u correctly either. We could make
strcat a separate micro-benchmark, but it is not a particularly
interesting test case currently.
---
 src/problem/JSONParse.jl | 59 ++++++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/src/problem/JSONParse.jl b/src/problem/JSONParse.jl
index 574ef139..2b803c0e 100644
--- a/src/problem/JSONParse.jl
+++ b/src/problem/JSONParse.jl
@@ -75,36 +75,53 @@ function perf_parse_json(strng::AbstractString)
         if strng[pos] != '"'
             error("AbstractString starting with quotation expected at position $pos")
         else
-            pos = pos + 1
+            pos += 1
         end
-        str = ""
+        str = IOBuffer()
         while pos <= len
             nc = strng[pos]
             if nc == '"'
+                pos += 1
+                return String(take!(str))
+            elseif nc == '\\'
                 pos = pos + 1
-                return string(str)
-            elseif nc ==  '\\'
-                if pos+1 > len
-                    error_pos("End of file reached right after escape character")
-                end
-                pos = pos + 1
+                pos > len && break # goto error handling
                 anc = strng[pos]
-                if anc == '"' || anc == '\\' || anc == '/'
-                    str = string(str, strng[pos])
-                    pos = pos + 1
-                elseif anc ==  'b' || anc == 'f'|| anc == 'n' || anc == 'r' || anc == 't'
-                    str = string(str, '\\', string[pos])
-                    pos = pos + 1
+                if anc ==  '"'
+                    write(str, "\"")
+                    pos += 1
+                elseif anc ==  '\\'
+                    write(str, "\\")
+                    pos += 1
+                elseif anc ==  '/'
+                    write(str, "/")
+                    pos += 1
+                elseif anc ==  'b'
+                    write(str, "\b")
+                    pos += 1
+                elseif anc ==  'f'
+                    write(str, "\f")
+                    pos += 1
+                elseif anc ==  'n'
+                    write(str, "\n")
+                    pos += 1
+                elseif anc ==  'r'
+                    write(str, "\r")
+                    pos += 1
+                elseif anc ==  't'
+                    write(str, "\t")
+                    pos += 1
                 elseif anc == 'u'
-                    if pos+4 > len
-                        error_pos("End of file reached in escaped unicode character")
-                    end
-                    str = string(str, strng[pos-1:pos+4])
+                    pos + 4 > len && break # goto error handling
+                    write(str, Char(parse(Int, strng[pos:pos+4], base=16)))
                     pos = pos + 5
+                else # should rarely happen
+                    write(str, anc)
+                    pos = pos + 1
                 end
-            else # should never happen
-                str = string(str,strng[pos])
-                pos = pos + 1
+            else # common case
+                write(str, nc)
+                pos = nextind(strng, pos)
             end
         end
         error("End of file while expecting end of string")