Skip to content

Commit 300dfb5

Browse files
authored
Convert examples to 1-liners for the tool. Add descriptions/senses to the uploader. Gitignore for java/python stuff. (#109)
1 parent 51b29a8 commit 300dfb5

File tree

3 files changed

+16
-269
lines changed

3 files changed

+16
-269
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
inflection/tools/dictionary-parser/bin/*
2+
fst/__pycache__/*
3+
.vscode/settings.json

data/tools/new_lexeme_sample.json

Lines changed: 3 additions & 269 deletions
Original file line numberDiff line numberDiff line change
@@ -1,271 +1,5 @@
11
[
2-
{
3-
"lemma": "дворац",
4-
"grammaticalCategory": "noun",
5-
"language": "sr",
6-
"grammaticalGender": "masculine",
7-
"forms": [
8-
{
9-
"value": "дворац",
10-
"grammaticalFeatures": [
11-
"singular",
12-
"nominative"
13-
]
14-
},
15-
{
16-
"value": "двораца",
17-
"grammaticalFeatures": [
18-
"singular",
19-
"genitive"
20-
]
21-
},
22-
{
23-
"value": "дворцу",
24-
"grammaticalFeatures": [
25-
"singular",
26-
"dative"
27-
]
28-
},
29-
{
30-
"value": "дворац",
31-
"grammaticalFeatures": [
32-
"singular",
33-
"accusative"
34-
]
35-
},
36-
{
37-
"value": "дворцу",
38-
"grammaticalFeatures": [
39-
"singular",
40-
"vocative"
41-
]
42-
},
43-
{
44-
"value": "дворцом",
45-
"grammaticalFeatures": [
46-
"singular",
47-
"instrumental"
48-
]
49-
},
50-
{
51-
"value": "дворцу",
52-
"grammaticalFeatures": [
53-
"singular",
54-
"locative"
55-
]
56-
},
57-
{
58-
"value": "дворци",
59-
"grammaticalFeatures": [
60-
"plural",
61-
"nominative"
62-
]
63-
},
64-
{
65-
"value": "двораца",
66-
"grammaticalFeatures": [
67-
"plural",
68-
"genitive"
69-
]
70-
},
71-
{
72-
"value": "дворцима",
73-
"grammaticalFeatures": [
74-
"plural",
75-
"dative"
76-
]
77-
},
78-
{
79-
"value": "дворце",
80-
"grammaticalFeatures": [
81-
"plural",
82-
"accusative"
83-
]
84-
},
85-
{
86-
"value": "дворци",
87-
"grammaticalFeatures": [
88-
"plural",
89-
"vocative"
90-
]
91-
},
92-
{
93-
"value": "дворцима",
94-
"grammaticalFeatures": [
95-
"plural",
96-
"instrumental"
97-
]
98-
},
99-
{
100-
"value": "дворцима",
101-
"grammaticalFeatures": [
102-
"plural",
103-
"locative"
104-
]
105-
}
106-
]
107-
},
108-
{
109-
"lemma": "април",
110-
"grammaticalCategory": "noun",
111-
"language": "sr",
112-
"grammaticalGender": "masculine",
113-
"forms": [
114-
{
115-
"value": "април",
116-
"grammaticalFeatures": [
117-
"singular",
118-
"nominative"
119-
]
120-
},
121-
{
122-
"value": "априла",
123-
"grammaticalFeatures": [
124-
"singular",
125-
"genitive"
126-
]
127-
},
128-
{
129-
"value": "априлу",
130-
"grammaticalFeatures": [
131-
"singular",
132-
"dative"
133-
]
134-
},
135-
{
136-
"value": "април",
137-
"grammaticalFeatures": [
138-
"singular",
139-
"accusative"
140-
]
141-
},
142-
{
143-
"value": "априлу",
144-
"grammaticalFeatures": [
145-
"singular",
146-
"vocative"
147-
]
148-
},
149-
{
150-
"value": "априлом",
151-
"grammaticalFeatures": [
152-
"singular",
153-
"instrumental"
154-
]
155-
},
156-
{
157-
"value": "априлу",
158-
"grammaticalFeatures": [
159-
"singular",
160-
"locative"
161-
]
162-
},
163-
{
164-
"value": "априли",
165-
"grammaticalFeatures": [
166-
"plural",
167-
"nominative"
168-
]
169-
},
170-
{
171-
"value": "априла",
172-
"grammaticalFeatures": [
173-
"plural",
174-
"genitive"
175-
]
176-
},
177-
{
178-
"value": "априлима",
179-
"grammaticalFeatures": [
180-
"plural",
181-
"dative"
182-
]
183-
},
184-
{
185-
"value": "априле",
186-
"grammaticalFeatures": [
187-
"plural",
188-
"accusative"
189-
]
190-
},
191-
{
192-
"value": "априли",
193-
"grammaticalFeatures": [
194-
"plural",
195-
"vocative"
196-
]
197-
},
198-
{
199-
"value": "априлима",
200-
"grammaticalFeatures": [
201-
"plural",
202-
"instrumental"
203-
]
204-
},
205-
{
206-
"value": "априлима",
207-
"grammaticalFeatures": [
208-
"plural",
209-
"locative"
210-
]
211-
}
212-
]
213-
},
214-
{
215-
"lemma": "коњ",
216-
"grammaticalCategory": "noun",
217-
"language": "sr",
218-
"grammaticalGender": "masculine",
219-
"forms": [
220-
{
221-
"value": "коњ",
222-
"grammaticalFeatures": [
223-
"singular",
224-
"nominative"
225-
]
226-
},
227-
{
228-
"value": "коњ",
229-
"grammaticalFeatures": [
230-
"singular",
231-
"genitive"
232-
]
233-
},
234-
{
235-
"value": "коњ",
236-
"grammaticalFeatures": [
237-
"singular",
238-
"dative"
239-
]
240-
},
241-
{
242-
"value": "коњ",
243-
"grammaticalFeatures": [
244-
"singular",
245-
"accusative"
246-
]
247-
},
248-
{
249-
"value": "коњ",
250-
"grammaticalFeatures": [
251-
"singular",
252-
"vocative"
253-
]
254-
},
255-
{
256-
"value": "коњ",
257-
"grammaticalFeatures": [
258-
"singular",
259-
"instrumental"
260-
]
261-
},
262-
{
263-
"value": "коњ",
264-
"grammaticalFeatures": [
265-
"singular",
266-
"locative"
267-
]
268-
}
269-
]
270-
}
2+
{"lemma":"дворац","grammaticalCategory":"noun","language":"sr","grammaticalGender":"masculine","forms":[{"value":"дворац","grammaticalFeatures":["singular","nominative"]},{"value":"двораца","grammaticalFeatures":["singular","genitive"]},{"value":"дворцу","grammaticalFeatures":["singular","dative"]},{"value":"дворац","grammaticalFeatures":["singular","accusative"]},{"value":"дворцу","grammaticalFeatures":["singular","vocative"]},{"value":"дворцом","grammaticalFeatures":["singular","instrumental"]},{"value":"дворцу","grammaticalFeatures":["singular","locative"]},{"value":"дворци","grammaticalFeatures":["plural","nominative"]},{"value":"двораца","grammaticalFeatures":["plural","genitive"]},{"value":"дворцима","grammaticalFeatures":["plural","dative"]},{"value":"дворце","grammaticalFeatures":["plural","accusative"]},{"value":"дворци","grammaticalFeatures":["plural","vocative"]},{"value":"дворцима","grammaticalFeatures":["plural","instrumental"]},{"value":"дворцима","grammaticalFeatures":["plural","locative"]}],"descriptions":[{"language":"sr","value":"Средњевековно пребивалижте владара, поседује и одбрамебене особине."},{"language":"en","value":"Castle"}]},
3+
{"lemma":"април","grammaticalCategory":"noun","language":"sr","grammaticalGender":"masculine","forms":[{"value":"април","grammaticalFeatures":["singular","nominative"]},{"value":"априла","grammaticalFeatures":["singular","genitive"]},{"value":"априлу","grammaticalFeatures":["singular","dative"]},{"value":"април","grammaticalFeatures":["singular","accusative"]},{"value":"априлу","grammaticalFeatures":["singular","vocative"]},{"value":"априлом","grammaticalFeatures":["singular","instrumental"]},{"value":"априлу","grammaticalFeatures":["singular","locative"]},{"value":"априли","grammaticalFeatures":["plural","nominative"]},{"value":"априла","grammaticalFeatures":["plural","genitive"]},{"value":"априлима","grammaticalFeatures":["plural","dative"]},{"value":"априле","grammaticalFeatures":["plural","accusative"]},{"value":"априли","grammaticalFeatures":["plural","vocative"]},{"value":"априлима","grammaticalFeatures":["plural","instrumental"]},{"value":"априлима","grammaticalFeatures":["plural","locative"]}],"descriptions":[{"language":"sr","value":"Четврти месец у години"},{"language":"en","value":"April"}]},
4+
{"lemma":"коњ","grammaticalCategory":"noun","language":"sr","grammaticalGender":"masculine","forms":[{"value":"коњ","grammaticalFeatures":["singular","nominative"]},{"value":"коњ","grammaticalFeatures":["singular","genitive"]},{"value":"коњ","grammaticalFeatures":["singular","dative"]},{"value":"коњ","grammaticalFeatures":["singular","accusative"]},{"value":"коњ","grammaticalFeatures":["singular","vocative"]},{"value":"коњ","grammaticalFeatures":["singular","instrumental"]},{"value":"коњ","grammaticalFeatures":["singular","locative"]}],"descriptions":[{"language":"sr","value":"Четвороножна животиња, која се често користи за јахање или пренос терета."},{"language":"en","value":"Horse"}]}
2715
]

data/tools/wikidata_upload.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,16 @@ def build_tfsl_lexemes(new_lexemes):
190190
features.append(WIKI_TYPES[feature])
191191
lexeme += tfsl.LexemeForm(form["value"] @ wiki_lang, features)
192192

193+
if "descriptions" in new_lexeme:
194+
glosses = []
195+
for gloss in new_lexeme["descriptions"]:
196+
glosses.append(
197+
gloss["value"] @ tfsl.languages.get_first_lang(gloss["language"])
198+
)
199+
lexeme += tfsl.LexemeSense(glosses)
200+
else:
201+
print(f'WARNING: Description missing for {new_lexeme["lemma"]}.')
202+
193203
tfsl_lexemes.append(lexeme)
194204

195205
return tfsl_lexemes

0 commit comments

Comments
 (0)