|
| 1 | +# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. |
| 2 | +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 3 | +# |
| 4 | +# The Universal Permissive License (UPL), Version 1.0 |
| 5 | +# |
| 6 | +# Subject to the condition set forth below, permission is hereby granted to any |
| 7 | +# person obtaining a copy of this software, associated documentation and/or |
| 8 | +# data (collectively the "Software"), free of charge and under any and all |
| 9 | +# copyright rights in the Software, and any and all patent rights owned or |
| 10 | +# freely licensable by each licensor hereunder covering either (i) the |
| 11 | +# unmodified Software as contributed to or provided by such licensor, or (ii) |
| 12 | +# the Larger Works (as defined below), to deal in both |
| 13 | +# |
| 14 | +# (a) the Software, and |
| 15 | +# |
| 16 | +# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if |
| 17 | +# one is included with the Software each a "Larger Work" to which the Software |
| 18 | +# is contributed by such licensors), |
| 19 | +# |
| 20 | +# without restriction, including without limitation the rights to copy, create |
| 21 | +# derivative works of, display, perform, and distribute the Software and make, |
| 22 | +# use, sell, offer for sale, import, export, have made, and have sold the |
| 23 | +# Software and the Larger Work(s), and to sublicense the foregoing rights on |
| 24 | +# either these or other terms. |
| 25 | +# |
| 26 | +# This license is subject to the following condition: |
| 27 | +# |
| 28 | +# The above copyright notice and either this complete permission notice or at a |
| 29 | +# minimum a reference to the UPL must be included in all copies or substantial |
| 30 | +# portions of the Software. |
| 31 | +# |
| 32 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 33 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 34 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 35 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 36 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 37 | +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 38 | +# SOFTWARE. |
| 39 | + |
| 40 | +from multiprocessing import Pool |
| 41 | +from time import time |
| 42 | +from itertools import permutations |
| 43 | +from pprint import pprint |
| 44 | + |
| 45 | + |
| 46 | +NGRAMS = 4 |
| 47 | + |
| 48 | + |
| 49 | +def jaccard(fileA, fileB): |
| 50 | + with open(fileA) as f: |
| 51 | + x = f.read().split() |
| 52 | + x = set(zip(*[x[i:] for i in range(NGRAMS)])) |
| 53 | + with open(fileB) as f: |
| 54 | + y = f.read().split() |
| 55 | + y = set(zip(*[y[i:] for i in range(NGRAMS)])) |
| 56 | + intersection_cardinality = len(x & y) |
| 57 | + union_cardinality = len(x | y) |
| 58 | + return (intersection_cardinality, union_cardinality, intersection_cardinality / union_cardinality) |
| 59 | + |
| 60 | + |
| 61 | +def parallel_jaccard(pair): |
| 62 | + return pair, jaccard(*pair) |
| 63 | + |
| 64 | + |
| 65 | +if __name__ == '__main__': |
| 66 | + import sys, os |
| 67 | + |
| 68 | + iterations = 50 |
| 69 | + pairs = list(permutations([os.path.join(sys.argv[1], f) for f in os.listdir(sys.argv[1])], 2)) |
| 70 | + |
| 71 | + for i in range(4): |
| 72 | + print(f"Warmup run #{i}") |
| 73 | + start = time() |
| 74 | + for res in map(parallel_jaccard, pairs): |
| 75 | + print(f"\t{time() - start}s => {res[0]}: {res[1]}", flush=True) |
| 76 | + |
| 77 | + with Pool(8) as p: |
| 78 | + for i in range(iterations): |
| 79 | + start = time() |
| 80 | + print(f"#{i + 1}:", end="", flush=True) |
| 81 | + for res in p.imap_unordered(parallel_jaccard, pairs): |
| 82 | + print(f"\t{time() - start}s => {res[0]}: {res[1]}", flush=True) |
| 83 | + # for pair in pairs: |
| 84 | + # value = jaccard(*pair) |
| 85 | + # print(f"\t{time() - start}s => {pair}: {value}", flush=True) |
0 commit comments