|
8 | 8 | from collections import defaultdict |
9 | 9 | from . import sparql_bot |
10 | 10 |
|
| 11 | +categoryLabels = { |
| 12 | + "Q24905": "فعل", |
| 13 | + "Q111029": "جذر", |
| 14 | + "Q1084": "اسم", |
| 15 | + "Q34698": "صفة", |
| 16 | + "Q147276": "اسم علم", |
| 17 | + "Q4833830": "حرف جر", |
| 18 | + "Q9788": "حرف", |
| 19 | + "Q36484": "حرف ربط", |
| 20 | + "Q468801": "ضمير شخصي", |
| 21 | + "Q63116": "اسم عدد" |
| 22 | +} |
| 23 | +# --- |
| 24 | + |
11 | 25 |
|
12 | 26 | def split_data_by_category_list(data): |
13 | 27 | # --- |
@@ -42,7 +56,7 @@ def split_data_by_category_dict(data): |
42 | 56 | if category not in split_by_category: |
43 | 57 | split_by_category[category] = { |
44 | 58 | 'category': category, |
45 | | - 'categoryLabel': item['categoryLabel'], |
| 59 | + 'categoryLabel': item.get('categoryLabel') or categoryLabels.get(category, ""), |
46 | 60 | 'members': {} |
47 | 61 | } |
48 | 62 | # --- |
@@ -125,23 +139,34 @@ def render_duplicate_by_category(limit): |
125 | 139 | return new, sparql_exec_time |
126 | 140 |
|
127 | 141 |
|
128 | | -def render_duplicate(limit): |
| 142 | +def render_duplicate(limit=0): |
129 | 143 | # --- |
130 | 144 | result, sparql_exec_time, err = sparql_bot.find_duplicates() |
131 | | - # # --- |
| 145 | + # --- |
132 | 146 | # result = {x['item']: x for x in result} |
133 | | - # # --- |
| 147 | + # --- |
134 | 148 | # split_by_category = split_data_by_category_dict(result) |
135 | | - # # --- |
136 | | - # new = {} |
137 | | - # # --- |
138 | | - # for cat, tab in split_by_category.items(): |
139 | | - # # --- |
140 | | - # members = duplicates_work(tab["members"]) |
141 | | - # # --- |
142 | | - # if members: |
143 | | - # tab["lemmas"] = members |
144 | | - # # --- |
145 | | - # new[cat] = tab |
146 | | - # # --- |
147 | | - return result, sparql_exec_time |
| 149 | + # --- |
| 150 | + new = {} |
| 151 | + # --- |
| 152 | + # { "lemma_fixed": "تذكير", "category": "Q1084", "items": "L1457168, L1457168", "lemmas": "تذكير, تَذْكِير" } |
| 153 | + for tab in result: |
| 154 | + # --- |
| 155 | + new.setdefault(tab['lemma_fixed'], { |
| 156 | + "lemma": tab['lemma_fixed'], |
| 157 | + "category": tab['category'], |
| 158 | + "categoryLabel": categoryLabels.get(tab['category'], ""), |
| 159 | + 'members' : [] |
| 160 | + }) |
| 161 | + # --- |
| 162 | + lemmas = tab['lemmas'].split(",") |
| 163 | + items = tab['items'].split(",") |
| 164 | + # --- |
| 165 | + for lemma, item in zip(lemmas, items): |
| 166 | + # --- |
| 167 | + new[tab['lemma_fixed']]['members'].append({ |
| 168 | + "lemma": lemma.strip(), |
| 169 | + "item": item.strip(), |
| 170 | + }) |
| 171 | + # --- |
| 172 | + return new, sparql_exec_time |
0 commit comments