Skip to content

Commit 7cecfbb

Browse files
committed
add my benchmark for local execution
1 parent 8fa489d commit 7cecfbb

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
2+
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3+
#
4+
# The Universal Permissive License (UPL), Version 1.0
5+
#
6+
# Subject to the condition set forth below, permission is hereby granted to any
7+
# person obtaining a copy of this software, associated documentation and/or
8+
# data (collectively the "Software"), free of charge and under any and all
9+
# copyright rights in the Software, and any and all patent rights owned or
10+
# freely licensable by each licensor hereunder covering either (i) the
11+
# unmodified Software as contributed to or provided by such licensor, or (ii)
12+
# the Larger Works (as defined below), to deal in both
13+
#
14+
# (a) the Software, and
15+
#
16+
# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
17+
# one is included with the Software each a "Larger Work" to which the Software
18+
# is contributed by such licensors),
19+
#
20+
# without restriction, including without limitation the rights to copy, create
21+
# derivative works of, display, perform, and distribute the Software and make,
22+
# use, sell, offer for sale, import, export, have made, and have sold the
23+
# Software and the Larger Work(s), and to sublicense the foregoing rights on
24+
# either these or other terms.
25+
#
26+
# This license is subject to the following condition:
27+
#
28+
# The above copyright notice and either this complete permission notice or at a
29+
# minimum a reference to the UPL must be included in all copies or substantial
30+
# portions of the Software.
31+
#
32+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38+
# SOFTWARE.
39+
40+
from multiprocessing import Pool
41+
from time import time
42+
from itertools import permutations
43+
from pprint import pprint
44+
45+
46+
NGRAMS = 4
47+
48+
49+
def jaccard(fileA, fileB):
50+
with open(fileA) as f:
51+
x = f.read().split()
52+
x = set(zip(*[x[i:] for i in range(NGRAMS)]))
53+
with open(fileB) as f:
54+
y = f.read().split()
55+
y = set(zip(*[y[i:] for i in range(NGRAMS)]))
56+
intersection_cardinality = len(x & y)
57+
union_cardinality = len(x | y)
58+
return (intersection_cardinality, union_cardinality, intersection_cardinality / union_cardinality)
59+
60+
61+
def parallel_jaccard(pair):
62+
return pair, jaccard(*pair)
63+
64+
65+
if __name__ == '__main__':
66+
import sys, os
67+
68+
iterations = 50
69+
pairs = list(permutations([os.path.join(sys.argv[1], f) for f in os.listdir(sys.argv[1])], 2))
70+
71+
for i in range(4):
72+
print(f"Warmup run #{i}")
73+
start = time()
74+
for res in map(parallel_jaccard, pairs):
75+
print(f"\t{time() - start}s => {res[0]}: {res[1]}", flush=True)
76+
77+
with Pool(8) as p:
78+
for i in range(iterations):
79+
start = time()
80+
print(f"#{i + 1}:", end="", flush=True)
81+
for res in p.imap_unordered(parallel_jaccard, pairs):
82+
print(f"\t{time() - start}s => {res[0]}: {res[1]}", flush=True)
83+
# for pair in pairs:
84+
# value = jaccard(*pair)
85+
# print(f"\t{time() - start}s => {pair}: {value}", flush=True)

0 commit comments

Comments
 (0)