@@ -84,6 +84,8 @@ func scrape(texts, entities []string, tokenizer tokenize.Tokenizer) error {
84
84
entity := entities [0 ]
85
85
log .Printf ("entity=%s" , entity )
86
86
87
+ l := log .New (os .Stderr , entity + ":" , 0 )
88
+
87
89
// Ignore articles without entity
88
90
temp := texts [:0 ]
89
91
for _ , text := range texts {
@@ -92,9 +94,9 @@ func scrape(texts, entities []string, tokenizer tokenize.Tokenizer) error {
92
94
}
93
95
}
94
96
texts = temp
95
- log .Printf ("len(texts)=%d" , len (texts ))
97
+ l .Printf ("len(texts)=%d" , len (texts ))
96
98
97
- poS := tokenize .ADJ | tokenize .ADP | tokenize .ADV | tokenize .CONJ | tokenize .DET | tokenize .NOUN | tokenize .NUM | tokenize .PRON | tokenize .VERB
99
+ poS := tokenize .ADJ | tokenize .ADP | tokenize .ADV | tokenize .CONJ | tokenize .DET | tokenize .NOUN | tokenize .NUM | tokenize .PRON | tokenize .PRT | tokenize . VERB
98
100
meanN , err := assocentity .MeanN (
99
101
context .Background (),
100
102
tokenizer ,
@@ -103,18 +105,18 @@ func scrape(texts, entities []string, tokenizer tokenize.Tokenizer) error {
103
105
entities ,
104
106
)
105
107
if err != nil {
106
- log .Fatal (err )
108
+ l .Fatal (err )
107
109
}
108
110
109
- log .Printf ("len(meanN)=%d" , len (meanN ))
111
+ l .Printf ("len(meanN)=%d" , len (meanN ))
110
112
111
113
if len (meanN ) == 0 {
112
- log .Print ("no meanN found, exiting" )
114
+ l .Print ("no meanN found, exiting" )
113
115
os .Exit (0 )
114
116
}
115
117
116
118
// Convert to slice to make it sortable
117
- log .Println ("convert to slice" )
119
+ l .Println ("convert to slice" )
118
120
type meanNVal struct {
119
121
dist float64
120
122
tok tokenize.Token
@@ -129,7 +131,7 @@ func scrape(texts, entities []string, tokenizer tokenize.Tokenizer) error {
129
131
}
130
132
131
133
// Sort by closest distance
132
- log .Println ("sort by pos and distance" )
134
+ l .Println ("sort by pos and distance" )
133
135
sort .Slice (meanNVals , func (i , j int ) bool {
134
136
if meanNVals [i ].tok .PoS != meanNVals [j ].tok .PoS {
135
137
return meanNVals [i ].tok .PoS < meanNVals [j ].tok .PoS
@@ -138,7 +140,7 @@ func scrape(texts, entities []string, tokenizer tokenize.Tokenizer) error {
138
140
})
139
141
140
142
// Top 10 per pos
141
- log .Println ("limit top 10" )
143
+ l .Println ("limit top 10" )
142
144
type topMeanNVal struct {
143
145
Dist float64 `json:"distance"`
144
146
Pos string `json:"pos"`
@@ -160,18 +162,18 @@ func scrape(texts, entities []string, tokenizer tokenize.Tokenizer) error {
160
162
161
163
poSCounter [meanNVal .tok .PoS ] += 1
162
164
}
163
- log .Printf ("len(topMeanNVals)=%d" , len (topMeanNVals ))
165
+ l .Printf ("len(topMeanNVals)=%d" , len (topMeanNVals ))
164
166
165
167
// Write top 10 to disk
166
- log .Println ("write to disk" )
168
+ l .Println ("write to disk" )
167
169
file , err := json .MarshalIndent (& topMeanNVals , "" , " " )
168
170
if err != nil {
169
- log .Fatal (err )
171
+ l .Fatal (err )
170
172
}
171
173
name := url .QueryEscape (strings .ToLower (entity ))
172
174
path := filepath .Join ("web/public" , name + ".json" )
173
175
if err := os .WriteFile (path , file , 0600 ); err != nil {
174
- log .Fatal (err )
176
+ l .Fatal (err )
175
177
}
176
178
177
179
return nil
0 commit comments