|
1 |
| -/* |
2 |
| - Copyright (c) 2013 LDBC |
3 |
| - Linked Data Benchmark Council (http://www.ldbcouncil.org) |
4 |
| - |
5 |
| - This file is part of ldbc_snb_datagen. |
6 |
| - |
7 |
| - ldbc_snb_datagen is free software: you can redistribute it and/or modify |
8 |
| - it under the terms of the GNU General Public License as published by |
9 |
| - the Free Software Foundation, either version 3 of the License, or |
10 |
| - (at your option) any later version. |
11 |
| - |
12 |
| - ldbc_snb_datagen is distributed in the hope that it will be useful, |
13 |
| - but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
| - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 |
| - GNU General Public License for more details. |
16 |
| - |
17 |
| - You should have received a copy of the GNU General Public License |
18 |
| - along with ldbc_snb_datagen. If not, see <http://www.gnu.org/licenses/>. |
19 |
| - |
20 |
| - Copyright (C) 2011 OpenLink Software <[email protected]> |
21 |
| - All Rights Reserved. |
22 |
| - |
23 |
| - This program is free software; you can redistribute it and/or modify |
24 |
| - it under the terms of the GNU General Public License as published by |
25 |
| - the Free Software Foundation; only Version 2 of the License dated |
26 |
| - June 1991. |
27 |
| - |
28 |
| - This program is distributed in the hope that it will be useful, |
29 |
| - but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 |
| - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 |
| - GNU General Public License for more details. |
32 |
| - |
33 |
| - You should have received a copy of the GNU General Public License |
34 |
| - along with this program; if not, write to the Free Software |
35 |
| - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/ |
36 |
| -package ldbc.snb.datagen.dictionary; |
37 |
| - |
38 |
| -import ldbc.snb.datagen.DatagenParams; |
39 |
| - |
40 |
| -import java.io.BufferedReader; |
41 |
| -import java.io.InputStreamReader; |
42 |
| -import java.util.ArrayList; |
43 |
| -import java.util.Random; |
44 |
| -import java.util.TreeMap; |
45 |
| -import java.util.TreeSet; |
46 |
| - |
47 |
| -public class TagMatrix { |
48 |
| - |
49 |
| - private static final String SEPARATOR = " "; |
50 |
| - |
51 |
| - private TreeMap<Integer, ArrayList<Integer>> relatedTags; |
52 |
| - /** |
53 |
| - * < @brief An array of related tags per tag. |
54 |
| - */ |
55 |
| - private TreeMap<Integer, ArrayList<Double>> cumulative; |
56 |
| - |
57 |
| - private ArrayList<Integer> nonZeroTags; |
58 |
| - |
59 |
| - /** |
60 |
| - * < @brief The list of tags. |
61 |
| - */ |
62 |
| - |
63 |
| - public TagMatrix() { |
64 |
| - cumulative = new TreeMap<Integer, ArrayList<Double>>(); |
65 |
| - relatedTags = new TreeMap<Integer, ArrayList<Integer>>(); |
66 |
| - nonZeroTags = new ArrayList<Integer>(); |
67 |
| - load(DatagenParams.tagMatrixFile); |
68 |
| - |
69 |
| - } |
70 |
| - |
71 |
| - /** |
72 |
| - * @param tagMatrixFileName The tag matrix file name. |
73 |
| - * @brief Loads the tag matrix from a file. |
74 |
| - */ |
75 |
| - private void load(String tagMatrixFileName) { |
76 |
| - try { |
77 |
| - BufferedReader dictionary = new BufferedReader(new InputStreamReader(getClass() |
78 |
| - .getResourceAsStream(tagMatrixFileName), "UTF-8")); |
79 |
| - String line; |
80 |
| - while ((line = dictionary.readLine()) != null) { |
81 |
| - String data[] = line.split(SEPARATOR); |
82 |
| - int celebrityId = Integer.parseInt(data[0]); |
83 |
| - int topicId = Integer.parseInt(data[1]); |
84 |
| - double cumuluative = Double.parseDouble(data[2]); |
85 |
| - ArrayList<Double> cum = cumulative.get(celebrityId); |
86 |
| - if (cum == null) cumulative.put(celebrityId, new ArrayList<Double>()); |
87 |
| - cumulative.get(celebrityId).add(cumuluative); |
88 |
| - ArrayList<Integer> related = relatedTags.get(celebrityId); |
89 |
| - if (related == null) relatedTags.put(celebrityId, new ArrayList<Integer>()); |
90 |
| - relatedTags.get(celebrityId).add(topicId); |
91 |
| - } |
92 |
| - for (Integer tag : relatedTags.keySet()) { |
93 |
| - nonZeroTags.add(tag); |
94 |
| - } |
95 |
| - dictionary.close(); |
96 |
| - } catch (Exception e) { |
97 |
| - e.printStackTrace(); |
98 |
| - } |
99 |
| - } |
100 |
| - |
101 |
| - /** |
102 |
| - * @param randomTag The random tag number generator. |
103 |
| - * @param tag The tag identifier. |
104 |
| - * @return The related tag identifier. |
105 |
| - * @brief Gets a random related tag. |
106 |
| - */ |
107 |
| - public Integer getRandomRelated(Random randomTag, int tag) { |
108 |
| - int tagId = tag; |
109 |
| - if (relatedTags.get(tagId) == null) { |
110 |
| - tagId = nonZeroTags.get(randomTag.nextInt(nonZeroTags.size())); |
111 |
| - } |
112 |
| - return relatedTags.get(tagId).get(randomTag.nextInt(relatedTags.get(tagId).size())); |
113 |
| - } |
114 |
| - |
115 |
| - /** |
116 |
| - * @param randomTopic The random number generator used to select aditional popular tags |
117 |
| - * @param randomTag The random number generator used to select related tags. |
118 |
| - * @param popularTagId The popular tag identifier. |
119 |
| - * @param numTags The number of related tags to retrieve. |
120 |
| - * @return The set of related tags. |
121 |
| - * @brief Get a set of related tags. |
122 |
| - */ |
123 |
| - public TreeSet<Integer> getSetofTags(Random randomTopic, Random randomTag, int popularTagId, int numTags) { |
124 |
| - TreeSet<Integer> resultTags = new TreeSet<Integer>(); |
125 |
| - resultTags.add(popularTagId); |
126 |
| - while (resultTags.size() < numTags) { |
127 |
| - int tagId; |
128 |
| - tagId = popularTagId; |
129 |
| - |
130 |
| - if (relatedTags.get(tagId) == null) { |
131 |
| - tagId = nonZeroTags.get(randomTag.nextInt(nonZeroTags.size())); |
132 |
| - } |
133 |
| - |
134 |
| - // Doing binary search for finding the tag |
135 |
| - double randomDis = randomTag.nextDouble(); |
136 |
| - int lowerBound = 0; |
137 |
| - int upperBound = relatedTags.get(tagId).size(); |
138 |
| - int midPoint = (upperBound + lowerBound) / 2; |
139 |
| - |
140 |
| - while (upperBound > (lowerBound + 1)) { |
141 |
| - if (cumulative.get(tagId).get(midPoint) > randomDis) { |
142 |
| - upperBound = midPoint; |
143 |
| - } else { |
144 |
| - lowerBound = midPoint; |
145 |
| - } |
146 |
| - midPoint = (upperBound + lowerBound) / 2; |
147 |
| - } |
148 |
| - resultTags.add(relatedTags.get(tagId).get(midPoint)); |
149 |
| - } |
150 |
| - return resultTags; |
151 |
| - |
152 |
| - } |
153 |
| -} |
| 1 | +/* |
| 2 | + Copyright (c) 2013 LDBC |
| 3 | + Linked Data Benchmark Council (http://www.ldbcouncil.org) |
| 4 | + |
| 5 | + This file is part of ldbc_snb_datagen. |
| 6 | + |
| 7 | + ldbc_snb_datagen is free software: you can redistribute it and/or modify |
| 8 | + it under the terms of the GNU General Public License as published by |
| 9 | + the Free Software Foundation, either version 3 of the License, or |
| 10 | + (at your option) any later version. |
| 11 | + |
| 12 | + ldbc_snb_datagen is distributed in the hope that it will be useful, |
| 13 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | + GNU General Public License for more details. |
| 16 | + |
| 17 | + You should have received a copy of the GNU General Public License |
| 18 | + along with ldbc_snb_datagen. If not, see <http://www.gnu.org/licenses/>. |
| 19 | + |
| 20 | + Copyright (C) 2011 OpenLink Software <[email protected]> |
| 21 | + All Rights Reserved. |
| 22 | + |
| 23 | + This program is free software; you can redistribute it and/or modify |
| 24 | + it under the terms of the GNU General Public License as published by |
| 25 | + the Free Software Foundation; only Version 2 of the License dated |
| 26 | + June 1991. |
| 27 | + |
| 28 | + This program is distributed in the hope that it will be useful, |
| 29 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | + GNU General Public License for more details. |
| 32 | + |
| 33 | + You should have received a copy of the GNU General Public License |
| 34 | + along with this program; if not, write to the Free Software |
| 35 | + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/ |
| 36 | +package ldbc.snb.datagen.dictionary; |
| 37 | + |
| 38 | +import ldbc.snb.datagen.DatagenParams; |
| 39 | + |
| 40 | +import java.io.BufferedReader; |
| 41 | +import java.io.InputStreamReader; |
| 42 | +import java.util.ArrayList; |
| 43 | +import java.util.Random; |
| 44 | +import java.util.TreeMap; |
| 45 | +import java.util.TreeSet; |
| 46 | + |
| 47 | +public class TagMatrix { |
| 48 | + |
| 49 | + private static final String SEPARATOR = " "; |
| 50 | + |
| 51 | + private TreeMap<Integer, ArrayList<Integer>> relatedTags; |
| 52 | + /** |
| 53 | + * < @brief An array of related tags per tag. |
| 54 | + */ |
| 55 | + private TreeMap<Integer, ArrayList<Double>> cumulative; |
| 56 | + |
| 57 | + private ArrayList<Integer> nonZeroTags; |
| 58 | + |
| 59 | + /** |
| 60 | + * < @brief The list of tags. |
| 61 | + */ |
| 62 | + |
| 63 | + public TagMatrix() { |
| 64 | + cumulative = new TreeMap<Integer, ArrayList<Double>>(); |
| 65 | + relatedTags = new TreeMap<Integer, ArrayList<Integer>>(); |
| 66 | + nonZeroTags = new ArrayList<Integer>(); |
| 67 | + load(DatagenParams.tagMatrixFile); |
| 68 | + |
| 69 | + } |
| 70 | + |
| 71 | + /** |
| 72 | + * @param tagMatrixFileName The tag matrix file name. |
| 73 | + * @brief Loads the tag matrix from a file. |
| 74 | + */ |
| 75 | + private void load(String tagMatrixFileName) { |
| 76 | + try { |
| 77 | + BufferedReader dictionary = new BufferedReader(new InputStreamReader(getClass() |
| 78 | + .getResourceAsStream(tagMatrixFileName), "UTF-8")); |
| 79 | + String line; |
| 80 | + while ((line = dictionary.readLine()) != null) { |
| 81 | + String data[] = line.split(SEPARATOR); |
| 82 | + int celebrityId = Integer.parseInt(data[0]); |
| 83 | + int topicId = Integer.parseInt(data[1]); |
| 84 | + double cumuluative = Double.parseDouble(data[2]); |
| 85 | + ArrayList<Double> cum = cumulative.get(celebrityId); |
| 86 | + if (cum == null) cumulative.put(celebrityId, new ArrayList<Double>()); |
| 87 | + cumulative.get(celebrityId).add(cumuluative); |
| 88 | + ArrayList<Integer> related = relatedTags.get(celebrityId); |
| 89 | + if (related == null) relatedTags.put(celebrityId, new ArrayList<Integer>()); |
| 90 | + relatedTags.get(celebrityId).add(topicId); |
| 91 | + } |
| 92 | + for (Integer tag : relatedTags.keySet()) { |
| 93 | + nonZeroTags.add(tag); |
| 94 | + } |
| 95 | + dictionary.close(); |
| 96 | + } catch (Exception e) { |
| 97 | + e.printStackTrace(); |
| 98 | + } |
| 99 | + } |
| 100 | + |
| 101 | + /** |
| 102 | + * @param randomTag The random tag number generator. |
| 103 | + * @param tag The tag identifier. |
| 104 | + * @return The related tag identifier. |
| 105 | + * @brief Gets a random related tag. |
| 106 | + */ |
| 107 | + public Integer getRandomRelated(Random randomTag, int tag) { |
| 108 | + int tagId = tag; |
| 109 | + if (relatedTags.get(tagId) == null) { |
| 110 | + tagId = nonZeroTags.get(randomTag.nextInt(nonZeroTags.size())); |
| 111 | + } |
| 112 | + return relatedTags.get(tagId).get(randomTag.nextInt(relatedTags.get(tagId).size())); |
| 113 | + } |
| 114 | + |
| 115 | + /** |
| 116 | + * @param randomTopic The random number generator used to select aditional popular tags |
| 117 | + * @param randomTag The random number generator used to select related tags. |
| 118 | + * @param popularTagId The popular tag identifier. |
| 119 | + * @param numTags The number of related tags to retrieve. |
| 120 | + * @return The set of related tags. |
| 121 | + * @brief Get a set of related tags. |
| 122 | + */ |
| 123 | + public TreeSet<Integer> getSetofTags(Random randomTopic, Random randomTag, int popularTagId, int numTags) { |
| 124 | + TreeSet<Integer> resultTags = new TreeSet<Integer>(); |
| 125 | + resultTags.add(popularTagId); |
| 126 | + while (resultTags.size() < numTags) { |
| 127 | + int tagId; |
| 128 | + tagId = popularTagId; |
| 129 | + |
| 130 | + if (relatedTags.get(tagId) == null) { |
| 131 | + tagId = nonZeroTags.get(randomTag.nextInt(nonZeroTags.size())); |
| 132 | + } |
| 133 | + |
| 134 | + // Doing binary search for finding the tag |
| 135 | + double randomDis = randomTag.nextDouble(); |
| 136 | + int lowerBound = 0; |
| 137 | + int upperBound = relatedTags.get(tagId).size(); |
| 138 | + int midPoint = (upperBound + lowerBound) / 2; |
| 139 | + |
| 140 | + while (upperBound > (lowerBound + 1)) { |
| 141 | + if (cumulative.get(tagId).get(midPoint) > randomDis) { |
| 142 | + upperBound = midPoint; |
| 143 | + } else { |
| 144 | + lowerBound = midPoint; |
| 145 | + } |
| 146 | + midPoint = (upperBound + lowerBound) / 2; |
| 147 | + } |
| 148 | + resultTags.add(relatedTags.get(tagId).get(midPoint)); |
| 149 | + } |
| 150 | + return resultTags; |
| 151 | + |
| 152 | + } |
| 153 | +} |
0 commit comments