Skip to content

Commit 5194268

Browse files
committed
Correlated Name parameters for Query 1
1 parent 88cee3f commit 5194268

File tree

4 files changed

+55
-18
lines changed

4 files changed

+55
-18
lines changed

paramgenerator/generateparams.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ def findNameParameters(names, amount = 100):
3232
while counts[mid] - counts[i] < 0.1 * counts[mid]:
3333
res.extend([name for name in hist[counts[i]]])
3434
i -= 1
35-
3635
return res
3736

37+
38+
3839
class CSVSerializer:
3940
def __init__(self):
4041
self.handlers = []
@@ -141,7 +142,7 @@ def main(argv=None):
141142
friendsFiles.append(indir+file)
142143

143144
# read precomputed counts from files
144-
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, ts) = readfactors.load(factorFiles, friendsFiles)
145+
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts) = readfactors.load(factorFiles, friendsFiles)
145146

146147
# find person parameters
147148
print "find parameter bindings for Persons"
@@ -233,12 +234,17 @@ def main(argv=None):
233234
HS.append((HS0, HS1))
234235

235236
# Query 1 takes first name as a parameter
236-
nameParams = findNameParameters(nameFactors)# discoverparams.generate(nameFactors)
237-
# if there are fewer first names than person parameters, repeat some of the names
238-
if len(nameParams) < len(selectedPersonParams[2]):
239-
oldlen = len(nameParams)
240-
newlen = len(selectedPersonParams[2])
241-
nameParams.extend([nameParams[random.randint(0, oldlen-1)] for j in range(newlen-oldlen)])
237+
#nameParams = findNameParameters(nameFactors)# discoverparams.generate(nameFactors)
238+
## if there are fewer first names than person parameters, repeat some of the names
239+
#if len(nameParams) < len(selectedPersonParams[2]):
240+
# oldlen = len(nameParams)
241+
# newlen = len(selectedPersonParams[2])
242+
# nameParams.extend([nameParams[random.randint(0, oldlen-1)] for j in range(newlen-oldlen)])
243+
nameParams = []
244+
for person in selectedPersonParams[1]:
245+
nameParams.append(givenNames.getValue(person))
246+
247+
print nameParams
242248

243249
# serialize all the parameters as CSV
244250
csvWriters = {}

paramgenerator/readfactors.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,23 @@ def getValue(self, person, factor):
4141
def addValue(self, person, factor, value):
4242
self.values[person].addValue(factor, value)
4343

44+
class NameParameter:
45+
def __init__(self, persons=[]):
46+
self.values={}
47+
for p in persons:
48+
self.values[p] = ""
49+
50+
def setValue(self, person, value):
51+
self.values[person] = value
52+
53+
def getValue(self, person):
54+
return self.values[person]
4455

4556
def load(factorFiles, friendFiles):
4657
print "loading input for parameter generation"
4758
results = Factors()
4859
countries = Factors()
49-
60+
givenNames = NameParameter()
5061

5162
tagClasses = {}
5263
tags = {}
@@ -62,13 +73,14 @@ def load(factorFiles, friendFiles):
6273
person = int(line[0])
6374
if not results.existParam(person):
6475
results.addNewParam(person)
65-
results.addValue(person, "f", int(line[1]))
66-
results.addValue(person, "p", int(line[2]))
67-
results.addValue(person, "pl", int(line[3]))
68-
results.addValue(person, "pt", int(line[4]))
69-
results.addValue(person, "g", int(line[5]))
70-
results.addValue(person, "w", int(line[6]))
71-
results.addValue(person, "pr", int(line[7]))
76+
givenNames.setValue(person, line[1])
77+
results.addValue(person, "f", int(line[2]))
78+
results.addValue(person, "p", int(line[3]))
79+
results.addValue(person, "pl", int(line[4]))
80+
results.addValue(person, "pt", int(line[5]))
81+
results.addValue(person, "g", int(line[6]))
82+
results.addValue(person, "w", int(line[7]))
83+
results.addValue(person, "pr", int(line[8]))
7284

7385
countryCount = int(f.readline())
7486
for i in range(countryCount):
@@ -120,7 +132,7 @@ def load(factorFiles, friendFiles):
120132

121133
loadFriends(friendFiles, results)
122134

123-
return (results, countries, tags.items(), tagClasses.items(), names.items(), timestamp)
135+
return (results, countries, tags.items(), tagClasses.items(), names.items(), givenNames,timestamp)
124136

125137
def loadFriends(friendFiles, factors):
126138

src/main/java/ldbc/socialnet/dbgen/dictionary/NamesDictionary.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,5 +219,17 @@ public String getRandomGivenName(Random random, int locationId, boolean isMale,
219219

220220
return name;
221221
}
222+
223+
/**
224+
* return a given name which is the median of topN for a given location/gender/year
225+
* we use it for parameter generation
226+
*/
227+
public String getMedianGivenName(int locationId, boolean isMale, int birthYear){
228+
int period = 0;
229+
Vector<HashMap<Integer, Vector<String>>> target = (isMale) ? givenNamesByLocationsMale : givenNamesByLocationsFemale;
230+
int size = target.get(period).get(locationId).size();
231+
String name = target.get(period).get(locationId).get(size/2);
232+
return name;
233+
}
222234
}
223235

src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ public enum OrganisationType {
252252
private HashMap<Integer, Integer> tagClassCount;
253253
private HashMap<String, Integer> firstNameCount;
254254
private HashMap<Integer, Integer> tagNameCount;
255+
private HashMap<Long, String> medianFirstName;
255256
// For blocking
256257
private static final int reducerShift[] = { 26, 8, 1 };
257258

@@ -429,6 +430,7 @@ public ScalableGenerator(int threadId, Configuration conf ){
429430
this.postsPerCountry = new HashMap<Integer, Integer>();
430431
this.tagClassCount = new HashMap<Integer, Integer>();
431432
this.firstNameCount = new HashMap<String, Integer>();
433+
this.medianFirstName = new HashMap<Long, String>();
432434
this.tagNameCount = new HashMap<Integer, Integer>();
433435
if (threadId != -1){
434436
outUserProfile = "mr" + threadId + "_" + outUserProfileName;
@@ -793,6 +795,9 @@ public void generateUserActivity( ReducedUserProfile userProfile, Reducer<MapRed
793795
dataExporter.export(userInfo);
794796
int nameCount = firstNameCount.containsKey(extraInfo.getFirstName())? firstNameCount.get(extraInfo.getFirstName()):0;
795797
firstNameCount.put(extraInfo.getFirstName(), nameCount+1);
798+
String medianName = namesDictionary.getMedianGivenName(userProfile.getLocationId(),
799+
userProfile.getGender()==1, dateTimeGenerator.getBirthYear(userProfile.getBirthDay()));
800+
medianFirstName.put(userProfile.getAccountId(), medianName);
796801
long init = System.currentTimeMillis();
797802
if(conf.getBoolean("activity",true)) {
798803
Group wall = generateWall(userInfo);
@@ -1570,7 +1575,9 @@ private void writeFactorTable(){
15701575
// correct the group counts
15711576
//count.numberOfGroups += count.numberOfFriends;
15721577
StringBuffer strbuf = new StringBuffer();
1573-
strbuf.append(c.getKey()); strbuf.append(",");
1578+
strbuf.append(c.getKey()); strbuf.append(",");
1579+
String name = medianFirstName.get(c.getKey());
1580+
strbuf.append(name); strbuf.append(",");
15741581
strbuf.append(count.numberOfFriends); strbuf.append(",");
15751582
strbuf.append(count.numberOfPosts); strbuf.append(",");
15761583
strbuf.append(count.numberOfLikes); strbuf.append(",");

0 commit comments

Comments
 (0)