Skip to content

Commit 4cfbec2

Browse files
committed
restore publications
1 parent 91c1d57 commit 4cfbec2

File tree

2 files changed

+89
-5
lines changed

2 files changed

+89
-5
lines changed
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
{
2+
"cells": [
3+
{
4+
"metadata": {
5+
"ExecuteTime": {
6+
"end_time": "2025-08-11T06:43:00.200528Z",
7+
"start_time": "2025-08-11T06:41:23.376700Z"
8+
}
9+
},
10+
"cell_type": "code",
11+
"source": [
12+
"import os\n",
13+
"import dataflows as DF\n",
14+
"from datapackage_pipelines_migdar.flows.dump_to_es import my_dump_to_es, BoostingMappingGenerator\n",
15+
"\n",
16+
"# kubectl port-forward to the migdar elasticsearch\n",
17+
"os.environ[\"DATAFLOWS_ELASTICSEARCH\"] = \"localhost:9200\"\n",
18+
"\n",
19+
"# download from the pipelines pod data/ folder\n",
20+
"data_source = '../data/publications_in_es/datapackage.json'\n",
21+
"\n",
22+
"revision = 202538\n",
23+
"resource_name = 'publications'\n",
24+
"\n",
25+
"DF.Flow(\n",
26+
" DF.load(data_source),\n",
27+
" my_dump_to_es(\n",
28+
" indexes={\n",
29+
" 'migdar__' + resource_name: [\n",
30+
" {\n",
31+
" 'resource-name': resource_name,\n",
32+
" 'revision': revision\n",
33+
" }\n",
34+
" ]\n",
35+
" },\n",
36+
" mapper_cls=BoostingMappingGenerator,\n",
37+
" index_settings={'index.mapping.coerce': True},\n",
38+
" elasticsearch_options=dict(timeout=60)\n",
39+
" ),\n",
40+
").process()"
41+
],
42+
"id": "7f6e9369e2a5b6b7",
43+
"outputs": [
44+
{
45+
"data": {
46+
"text/plain": [
47+
"(<datapackage.package.Package at 0x77a0152f3f90>,\n",
48+
" {'count_of_rows': 4363,\n",
49+
" 'bytes': None,\n",
50+
" 'hash': '5f5818e16db4ba82b5038b9c42c4ea61',\n",
51+
" 'dataset_name': None})"
52+
]
53+
},
54+
"execution_count": 8,
55+
"metadata": {},
56+
"output_type": "execute_result"
57+
}
58+
],
59+
"execution_count": 8
60+
}
61+
],
62+
"metadata": {
63+
"kernelspec": {
64+
"display_name": "Python 3",
65+
"language": "python",
66+
"name": "python3"
67+
},
68+
"language_info": {
69+
"codemirror_mode": {
70+
"name": "ipython",
71+
"version": 2
72+
},
73+
"file_extension": ".py",
74+
"mimetype": "text/x-python",
75+
"name": "python",
76+
"nbconvert_exporter": "python",
77+
"pygments_lexer": "ipython2",
78+
"version": "2.7.6"
79+
}
80+
},
81+
"nbformat": 4,
82+
"nbformat_minor": 5
83+
}

pipeline-spec.yaml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@ zotero_fetch:
2424
- flow: datapackage_pipelines_migdar.flows.zotero
2525

2626

27-
publications:
28-
dependencies:
29-
- pipeline: ./zotero_fetch
30-
pipeline:
31-
- flow: datapackage_pipelines_migdar.flows.publications
27+
# google drive was deleted, we rely on existing data in elasticsearch restored by notebooks/restore_publications.ipynb
28+
#publications:
29+
# dependencies:
30+
# - pipeline: ./zotero_fetch
31+
# pipeline:
32+
# - flow: datapackage_pipelines_migdar.flows.publications
3233

3334

3435
sitemap:

0 commit comments

Comments
 (0)