-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patht-file-grep-vs-match-in-memory.sh
More file actions
executable file
·168 lines (144 loc) · 3.08 KB
/
t-file-grep-vs-match-in-memory.sh
File metadata and controls
executable file
·168 lines (144 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#! /usr/bin/env bash
#
# Q: To search file for matches: in memory search vs `grep`
# A: It is about 8-10x faster to read file into memory and then do matching
# priority: 10
#
# t1a real 0m0.049s read once + bash regexp (read file once + use loop)
# t1b real 0m0.054s read once + case..MATCH..esac (read file once + use loop)
# t2 real 0m0.283s grep
# t3 real 0m0.407s read + case..MATCH..esac (separate file calls)
# t4 real 0m0.440s read + bash regexp (separate file calls)
#
# Code:
#
# See the test code for more information. Overview:
#
# t1a read once and loop [[ str =~~ RE ]]
# t1b read once and loop case..MATCH..end
# t2 grep RE file in loop
# t3 read every time in loop. case..MATCH..end
# t4 read every time in loop. [[ str =~~ RE ]]
#
# Notes:
#
# Repeated reads of the same file
# probably utilizes Kernel cache to some
# extent. But it is still much faster to
# read file once into memory and then
# apply matching multiple times.
#
# The `grep` command is leaps ahead of
# re-reading the file in a loop and using
# the shell’s own matching capabilities.
#
# In Ksh, the "read into memory first, then
# match" is extremely fast:
#
# t1a real 0.005 user 0.002 sys 0.002
# t1b real 0.334 user 0.328 sys 0.005
# t2 real 0.221 user 0.126 sys 0.096
# t3 real 0.478 user 0.343 sys 0.137
# t4 real 0.222 user 0.153 sys 0.071
. ./t-lib.sh ; rand=$random_file
f=$(mktemp -t $TMPBASE.random.file.XXX)
string=abc
pattern="$string*$string"
re="$string.*$string"
Setup ()
{
{ echo "$string $string"; cat $rand; } > $f
}
Info ()
{
echo "INFO: test file: $(ls -l $f)"
}
Read ()
{
# Not supported by all shells:
# read -N$((100 * 1024)) REPLY < "$1"
# Use POSIX
REPLY=$(cat "$1")
}
MatchFileContentPattern () # POSIX
{
Read "$1"
case ${REPLY:-} in
$pattern)
return 0
;;
*)
return 1
;;
esac
}
# Hide from other shells
MatchFileContentRegexp () { : ; } # stub
t1a () { : ; } # stub
cat << 'EOF' > t.bash
MatchFileContentRegexp () # Bash regexp
{
Read "$1"
[[ "$REPLY" =~ $re ]]
}
t1a () # read once
{
Read "$f"
re=$string
for i in $(seq $loop_max)
do
[[ $REPLY =~ $re ]]
done
}
EOF
IsFeatureMatchRegexp && . ./t.bash
rm --force t.bash
t1b () # read once
{
Read "$f"
for i in $(seq $loop_max)
do
case ${REPLY:-} in
*$pattern*) ;;
esac
done
}
t2 () # read every time
{
for i in $(seq $loop_max)
do
# "grep -E" is the one that is typically used
grep --quiet --extended-regexp --files-with-matches "$re" $f
done
}
t3 () # read every time
{
for i in $(seq $loop_max)
do
MatchFileContentPattern $f
done
}
t4 () # read every time
{
for i in $(seq $loop_max)
do
MatchFileContentRegexp $f
done
}
t="\
:t t1a IsFeatureMatchRegexp
:t t1b
:t t2
:t t3
:t t4 IsFeatureMatchRegexp
"
SetupTrapAtExit
Setup
if [ "$source" ]; then
:
elif [ "$run" ]; then
"$@"
else
RunTests "$t" "$@"
fi
# End of file