Skip to content

Commit 9224e78

Browse files
authored
Add draft for adding links to wikipedia #673
1 parent 4a59071 commit 9224e78

File tree

1 file changed

+141
-0
lines changed

1 file changed

+141
-0
lines changed
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "57a00977-79ce-4d17-b6cc-49dd54bb2af6",
6+
"metadata": {},
7+
"source": [
8+
"## Import pywikibot and rdflib"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": 2,
14+
"id": "80e390c6-2cb3-4c6c-a260-7c21df6a0831",
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import pywikibot\n",
19+
"import rdflib"
20+
]
21+
},
22+
{
23+
"cell_type": "markdown",
24+
"id": "c56365cc-f1ba-4aab-a432-341450258c48",
25+
"metadata": {},
26+
"source": [
27+
"## Query wikidata for wikipedia pages of nwbib spatial concepts/locations"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 3,
33+
"id": "0ccae4b0-e678-48f4-bd8e-a20b232c0220",
34+
"metadata": {
35+
"scrolled": true
36+
},
37+
"outputs": [],
38+
"source": [
39+
"g = rdflib.Graph()\n",
40+
"qres = g.query(\n",
41+
" \"\"\"\n",
42+
"PREFIX wd: <http://www.wikidata.org/entity/>\n",
43+
"PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n",
44+
"PREFIX schema: <http://schema.org/>\n",
45+
"SELECT ?articleName\n",
46+
"WHERE {\n",
47+
" SERVICE <https://query.wikidata.org/sparql> {\n",
48+
" ?item wdt:P6814 ?nwbibId.\n",
49+
" ?article schema:about ?item.\n",
50+
" ?article schema:name ?articleName .\n",
51+
" ?article schema:isPartOf <https://de.wikipedia.org/>.\n",
52+
" }\n",
53+
" }\n",
54+
" \"\"\"\n",
55+
")\n"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": 4,
61+
"id": "fd2e6410-0ccc-4724-ad1c-5117aecd346d",
62+
"metadata": {},
63+
"outputs": [],
64+
"source": []
65+
},
66+
{
67+
"cell_type": "markdown",
68+
"id": "e8f7a562-33de-4291-b345-8aab06b0227f",
69+
"metadata": {},
70+
"source": [
71+
"## Iterate over the query results, check if page exists and get wikipedia pages"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": null,
77+
"id": "7db6d415-22c1-4209-8681-b8e694378c1a",
78+
"metadata": {},
79+
"outputs": [],
80+
"source": [
81+
"file1 = open(\"missing.txt\", \"a\")\n",
82+
"file2 = open(\"alreadyLinked.txt\", \"a\")\n",
83+
"\n",
84+
"for row in qres:\n",
85+
" page = pywikibot.Page(pywikibot.Site(), 'row.articleName')\n",
86+
" pageExists = page.exists()\n",
87+
" if pageExists == True:\n",
88+
" pageText = page.get()\n",
89+
" if \"NWBib\" not in pageText:\n",
90+
" print(page.text)\n",
91+
" # TODO: HOW TO REPLACE THIS IN A SMART WAY\n",
92+
" # test = page.text + \"\\n* {{NWBib}}\"\n",
93+
" # page.put(test, summary='Bot: Test edit')\n",
94+
" else:\n",
95+
" file2.write(row.articleName + \" nwbiblink SCHON DA \\n\")\n",
96+
" print(row.articleName + \" nwbiblink SCHON DA \\n\")\n",
97+
" else:\n",
98+
" file1.write(\"Seite für \" + row.articleName + \" fehlt \\n\")\n",
99+
"file1.close()\n",
100+
"file2.close()"
101+
]
102+
},
103+
{
104+
"cell_type": "code",
105+
"execution_count": 3,
106+
"id": "f69c6f63-b2c0-47aa-8fd4-a72310b789f9",
107+
"metadata": {},
108+
"outputs": [],
109+
"source": []
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"id": "3ed54705-47f7-4c84-a2f0-5282bd531deb",
115+
"metadata": {},
116+
"outputs": [],
117+
"source": []
118+
}
119+
],
120+
"metadata": {
121+
"kernelspec": {
122+
"display_name": "Python 3 (ipykernel)",
123+
"language": "python",
124+
"name": "python3"
125+
},
126+
"language_info": {
127+
"codemirror_mode": {
128+
"name": "ipython",
129+
"version": 3
130+
},
131+
"file_extension": ".py",
132+
"mimetype": "text/x-python",
133+
"name": "python",
134+
"nbconvert_exporter": "python",
135+
"pygments_lexer": "ipython3",
136+
"version": "3.12.3"
137+
}
138+
},
139+
"nbformat": 4,
140+
"nbformat_minor": 5
141+
}

0 commit comments

Comments
 (0)