Skip to content

Commit 733e52b

Browse files
committed
.
1 parent 3f7d4e3 commit 733e52b

File tree

7 files changed

+219
-32
lines changed

7 files changed

+219
-32
lines changed

python/src/app.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pyx import logs_bot_new
99
from pyx.wd_data_bots import wd_data_P11038
1010
from pyx.sparql_bots import sparql_bot
11-
from pyx.sparql_bots.render import render_duplicate_by_category, render_sparql_P11038_grouped
11+
from pyx.sparql_bots.render import render_duplicate_by_category, render_duplicate, render_sparql_P11038_grouped
1212
from pyx.bots.not_in_db_bot import get_not_in_db
1313

1414

@@ -55,6 +55,14 @@ def jsonify(data : dict, **kwargs) -> str:
5555
return Response(response=response_json, content_type="application/json; charset=utf-8")
5656

5757

58+
@app.route("/api/duplicate2", methods=["GET"])
59+
def duplicate2_api():
60+
# ---
61+
data, sparql_exec_time = render_duplicate()
62+
# ---
63+
return jsonify(data, sparql_exec_time=sparql_exec_time, len_result=len(data))
64+
65+
5866
@app.route("/api/wd_data_count", methods=["GET"])
5967
def wd_data_api_count():
6068
# ---
@@ -154,10 +162,28 @@ def P11038_wd():
154162
)
155163

156164

165+
@app.route("/duplicate2.html", methods=["GET"])
166+
def duplicate2():
167+
# ---
168+
limit = request.args.get('limit', 10000, type=int)
169+
# ---
170+
data, sparql_exec_time = render_duplicate(limit)
171+
# ---
172+
time_tab = {
173+
"sparql_exec_time": sparql_exec_time,
174+
}
175+
# ---
176+
return render_template(
177+
"duplicate2.html",
178+
result=data,
179+
time_tab=time_tab,
180+
)
181+
182+
157183
@app.route("/duplicate.html", methods=["GET"])
158184
def duplicate():
159185
# ---
160-
limit = request.args.get('limit', 10000, type=int)
186+
limit = request.args.get('limit', 50000, type=int)
161187
# ---
162188
data, sparql_exec_time = render_duplicate_by_category(limit)
163189
# ---

python/src/pyx/sparql_bots/render.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def render_sparql_P11038_grouped(limit=0, group_it=False):
8484
return tab_P11038, sparql_exec_time
8585

8686

87-
def find_duplicates(members):
87+
def duplicates_work(members):
8888
# ---
8989
duplicates = defaultdict(list)
9090
# ---
@@ -115,11 +115,33 @@ def render_duplicate_by_category(limit):
115115
# ---
116116
for cat, tab in split_by_category.items():
117117
# ---
118-
members = find_duplicates(tab["members"])
118+
members = duplicates_work(tab["members"])
119119
# ---
120120
if members:
121121
tab["lemmas"] = members
122122
# ---
123123
new[cat] = tab
124124
# ---
125125
return new, sparql_exec_time
126+
127+
128+
def render_duplicate(limit):
129+
# ---
130+
result, sparql_exec_time, err = sparql_bot.find_duplicates()
131+
# # ---
132+
# result = {x['item']: x for x in result}
133+
# # ---
134+
# split_by_category = split_data_by_category_dict(result)
135+
# # ---
136+
# new = {}
137+
# # ---
138+
# for cat, tab in split_by_category.items():
139+
# # ---
140+
# members = duplicates_work(tab["members"])
141+
# # ---
142+
# if members:
143+
# tab["lemmas"] = members
144+
# # ---
145+
# new[cat] = tab
146+
# # ---
147+
return result, sparql_exec_time

python/src/pyx/sparql_bots/sparql_bot.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
endpoint_url = 'https://query.wikidata.org/sparql'
1919

2020

21-
def safe_sparql_query(query):
21+
def safe_sparql_query(query, time_out=10):
2222

2323
if query in sparql_cache and "nocahe" not in sys.argv:
2424
err_bot.log_error("SPARQL Cache Hit", f"Query retrieved from cache: {query}")
@@ -31,7 +31,7 @@ def safe_sparql_query(query):
3131
sparql.setQuery(query)
3232
# ---
3333
sparql.setReturnFormat(JSON)
34-
sparql.setTimeout(10)
34+
sparql.setTimeout(time_out)
3535
# ---
3636
data = sparql.query().convert()
3737
# ---
@@ -61,11 +61,11 @@ def safe_sparql_query(query):
6161
return {}, "SPARQL Unknown Error"
6262

6363

64-
def get_results(query):
64+
def get_results(query, time_out=10, get_err=False):
6565
# ---
6666
now = time.time()
6767
# ---
68-
data, err = safe_sparql_query(query)
68+
data, err = safe_sparql_query(query, time_out=time_out)
6969
# ---
7070
# تنسيق النتائج
7171
result = []
@@ -95,6 +95,9 @@ def get_results(query):
9595
# ---
9696
print(f"SPARQL sparql_exec_time: {sparql_exec_time}")
9797
# ---
98+
if get_err:
99+
return result, sparql_exec_time, err
100+
# ---
98101
return result, sparql_exec_time
99102

100103

@@ -236,23 +239,26 @@ def count_arabic_with_P11038():
236239
return count, sparql_exec_time
237240

238241

239-
def find_duplicates():
242+
def find_duplicates(LIMIT=100):
240243
sparql_query = """
241-
SELECT ?lemma_1 ?category
242-
(GROUP_CONCAT(?1_item; separator=", ") AS ?items)
244+
SELECT ?lemma_fixed ?category
245+
(GROUP_CONCAT(strafter(str(?1_item),"/entity/"); separator=", ") AS ?items)
243246
(GROUP_CONCAT(?lemma; separator=", ") AS ?lemmas)
244247
WHERE {
245248
#service <https://qlever.cs.uni-freiburg.de/api/wikidata> {
246249
?1_item dct:language wd:Q13955;
247250
wikibase:lemma ?lemma;
248251
wikibase:lexicalCategory ?category.
249-
BIND(REPLACE(STR(?lemma), "[\u064B-\u065F\u066A-\u06EF]", "") AS ?lemma_1)
252+
BIND(REPLACE(STR(?lemma), "[\u064B-\u065F\u066A-\u06EF]", "") AS ?lemma_fixed)
250253
#}
251254
}
252-
GROUP BY ?lemma_1 ?category
255+
GROUP BY ?lemma_fixed ?category
253256
HAVING(COUNT(?1_item) > 1)
254-
#LIMIT 10
255257
"""
256-
data, sparql_exec_time = get_results(sparql_query)
257258
# ---
258-
return data, sparql_exec_time
259+
if LIMIT > 0:
260+
sparql_query += f" LIMIT {LIMIT}"
261+
# ---
262+
data, sparql_exec_time, err = get_results(sparql_query, time_out=35, get_err=True)
263+
# ---
264+
return data, sparql_exec_time, err

python/src/static/js/lexemes/compare.js

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ function convertDataNew(data) {
6767
}
6868

6969
async function get_wdresult(qids) {
70+
// ---
7071
const sparqlQuery = qids_data_query(qids);
7172
// ---
7273
add_sparql_url(sparqlQuery);
@@ -243,11 +244,7 @@ async function render_tables_container(data) {
243244
});
244245
}
245246

246-
async function load_compare() {
247-
// ---
248-
let qids = get_param_from_window_location("qids", "");
249-
// ---
250-
qids = qids.split(",");
247+
async function load_compare(qids) {
251248
// ---
252249
let data = await get_qids_data(qids);
253250
// ---

python/src/templates/compare.php

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,8 @@
2323
<span class="text-2xl font-bold text-center h2">
2424
مقارنة المفردات:
2525
</span>
26-
({%for qid in qids %}
27-
{%if not loop.first%} - {%endif%}
28-
<a href="https://www.wikidata.org/entity/{{qid}}" target="_blank">
29-
<span class="fs-5">
30-
<span find-label="{{qid}}" find-label-both="true">{{qid}}</span>
31-
</span>
32-
</a>
33-
{%endfor%})
26+
<span id="qids_span">
27+
</span>
3428
</div>
3529
<div class="col-md-2 col-sm-2 mb-2 mb-md-0">
3630
<a href="#" target="_blank" id="sparql_url" class="btn btn-outline-primary disabled" role="button">
@@ -60,7 +54,32 @@
6054
<script src="/static/js/toggleView_compare.js"></script>
6155
6256
<script>
63-
document.addEventListener('DOMContentLoaded', () => load_compare());
57+
async function start() {
58+
// ---
59+
let qids = get_param_from_window_location("qids", "") || "";
60+
// ---
61+
qids = qids ? qids.split(",") : [];
62+
// ---
63+
const container = document.getElementById("qids_span");
64+
if (container && qids.length > 0) {
65+
container.innerHTML = "(" + qids.map((qid, index) => {
66+
const separator = index === 0 ? "" : " - ";
67+
return `${separator}<a href="https://www.wikidata.org/entity/${qid}" target="_blank">
68+
<span class="fs-5">
69+
<span find-label="${qid}" find-label-both="true">${qid}</span>
70+
</span>
71+
</a>`;
72+
}).join('') + ")";
73+
} else if (container) {
74+
container.textContent = "()"; // حالة عدم وجود QIDs
75+
}
76+
77+
// --- استدعاء دالة التحميل ---
78+
await load_compare(qids);
79+
80+
}
81+
82+
document.addEventListener('DOMContentLoaded', () => start());
6483
</script>
6584
6685
{% endblock %}

python/src/templates/duplicate.html

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,14 @@ <h2 class="mb-4" id="header_main">
9999
],
100100
pending: true,
101101
lang: "ar",
102-
paging: true,
102+
paging: false,
103+
info: false,
103104
searching: true,
104105
responsive: {
105106
details: true
106107
// display: $.fn.dataTable.Responsive.display.modal()
107108
},
108-
pageLength: 100
109+
pageLength: 1000
109110

110111
})
111112
});
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
{% extends "main.php" %}
2+
{% block title %}
3+
<title>المفردات المتشابهة</title>
4+
{% endblock %}
5+
6+
{% block content %}
7+
<div class="container my-2">
8+
<div class="card shadow rounded-4">
9+
<div class="card-header">
10+
<div class="d-flex align-items-center justify-content-between m-1 p-1">
11+
<h2 class="mb-4" id="header_main">
12+
مقارنة وجود مفردات مكررة
13+
</h2>
14+
</div>
15+
</div>
16+
<div class="card-body">
17+
<div class="tab-content row list-group mt-3" id="myTabContent">
18+
19+
<table class="P11038_table table display table-striped table-bordered table_header_right">
20+
<thead>
21+
<tr>
22+
<th>#</th>
23+
<th>lemma</th>
24+
<th>الفئة</th>
25+
<th>العدد</th>
26+
<th>المفردات</th>
27+
<th>مقارنة</th>
28+
</tr>
29+
</thead>
30+
<tbody id="table_body">
31+
{%for category, data in result.items() %}
32+
{% for lemma, members in data.lemmas.items() %}
33+
<tr>
34+
<td>{{loop.index}}</td>
35+
<td>{{lemma}}</td>
36+
<td>
37+
<a href="https://www.wikidata.org/entity/{{ data.category }}"
38+
target="_blank">{{ data.categoryLabel }}</a>
39+
</td>
40+
<td>
41+
{{ members|length }}
42+
</td>
43+
<td>
44+
<div class="w-100">
45+
<div class="list-group list-group-horizontal">
46+
{%for item in members %}
47+
<div class="list-group-item m-1">
48+
<a href="https://www.wikidata.org/entity/{{item.item}}" target="_blank">
49+
<span class="fs-2">
50+
{{item.lemma}}
51+
</span>
52+
</a>
53+
</div>
54+
{%endfor%}
55+
</div>
56+
</div>
57+
</td>
58+
<td>
59+
<a href="compare.php?qids={{ members | map(attribute='item') | join(',') }}"
60+
target="_blank">
61+
مقارنة
62+
</a>
63+
</td>
64+
</tr>
65+
{%endfor%}
66+
{%endfor%}
67+
</tbody>
68+
</table>
69+
</div>
70+
</div>
71+
</div>
72+
</div>
73+
<script src="/static/js/lex/data.js"></script>
74+
<script src="/static/js/table_filter2.js"></script>
75+
<script src="/static/js/lex/lex_data.js"></script>
76+
<script src="/static/js/lex/lex_example.js"></script>
77+
<script src="/static/js/lex/lex.js"></script>
78+
<script src="/static/js/lex/fetch.js"></script>
79+
<script src="/static/js/lex/lex_page.js"></script>
80+
<script>
81+
82+
function setExample(lexeme) {
83+
document.getElementById('lexemeId').value = lexeme;
84+
start_lexeme(lexeme);
85+
}
86+
87+
$(document).ready(function () {
88+
// if ?lex=324 in url then load it setExample
89+
const urlParams = new URLSearchParams(window.location.search);
90+
const lex = urlParams.get('wd_id');
91+
if (lex) {
92+
setExample(lex);
93+
}
94+
load_search(setExample, 'wd_id', 'autocomplete-results');
95+
// ---
96+
$(".P11038_table").DataTable({
97+
order: [
98+
[3, "desc"]
99+
],
100+
pending: true,
101+
lang: "ar",
102+
paging: false,
103+
info: false,
104+
searching: true,
105+
responsive: {
106+
details: true
107+
// display: $.fn.dataTable.Responsive.display.modal()
108+
},
109+
pageLength: 1000
110+
111+
})
112+
});
113+
</script>
114+
115+
116+
{% endblock %}

0 commit comments

Comments
 (0)