Skip to content

Commit 254f473

Browse files
committed
Initial pipeline code
1 parent 631f9dd commit 254f473

File tree

16 files changed

+767
-0
lines changed

16 files changed

+767
-0
lines changed

.gitignore

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
*.egg-info/
24+
.installed.cfg
25+
*.egg
26+
MANIFEST
27+
28+
# PyInstaller
29+
# Usually these files are written by a python script from a template
30+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
31+
*.manifest
32+
*.spec
33+
34+
# Installer logs
35+
pip-log.txt
36+
pip-delete-this-directory.txt
37+
38+
# Unit test / coverage reports
39+
htmlcov/
40+
.tox/
41+
.coverage
42+
.coverage.*
43+
.cache
44+
nosetests.xml
45+
coverage.xml
46+
*.cover
47+
.hypothesis/
48+
.pytest_cache/
49+
50+
# Translations
51+
*.mo
52+
*.pot
53+
54+
# Django stuff:
55+
*.log
56+
local_settings.py
57+
db.sqlite3
58+
59+
# Flask stuff:
60+
instance/
61+
.webassets-cache
62+
63+
# Scrapy stuff:
64+
.scrapy
65+
66+
# Sphinx documentation
67+
docs/_build/
68+
69+
# PyBuilder
70+
target/
71+
72+
# Jupyter Notebook
73+
.ipynb_checkpoints
74+
75+
# pyenv
76+
.python-version
77+
78+
# celery beat schedule file
79+
celerybeat-schedule
80+
81+
# SageMath parsed files
82+
*.sage.py
83+
84+
# Environments
85+
.env
86+
.venv
87+
env/
88+
venv/
89+
ENV/
90+
env.bak/
91+
venv.bak/
92+
93+
# Spyder project settings
94+
.spyderproject
95+
.spyproject
96+
97+
# mypy
98+
.mypy_cache/
99+
.idea/*
100+
.vscode/settings.json

setup.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import os
2+
import re
3+
4+
import setuptools
5+
6+
directory = os.path.dirname(os.path.abspath(__file__))
7+
8+
# Extract version information
9+
# path = os.path.join(directory, 'sota_extractor2', '__init__.py')
10+
# with open(path) as read_file:
11+
# text = read_file.read()
12+
# pattern = re.compile(r"^__version__ = ['\"]([^'\"]*)['\"]", re.MULTILINE)
13+
# version = pattern.search(text).group(1)
14+
version="2.0-alpha"
15+
16+
# # Extract long_description
17+
# path = os.path.join(directory, 'README.md')
18+
# with open(path) as read_file:
19+
# long_description = read_file.read()
20+
long_description = ""
21+
setuptools.setup(
22+
name='sota_extractor2',
23+
version=version,
24+
url='https://...',
25+
description='System for extracting data from arxiv papers',
26+
long_description_content_type='text/markdown',
27+
long_description=long_description,
28+
license='???',
29+
packages=setuptools.find_packages(),
30+
include_package_data=True,
31+
32+
keywords='machine-learning ai information-extraction weak-supervision',
33+
classifiers=[
34+
'Intended Audience :: Science/Research',
35+
'Topic :: Scientific/Engineering :: Bio-Informatics',
36+
'Topic :: Scientific/Engineering :: Information Analysis',
37+
'License :: OSI Approved :: Apache Software License',
38+
'Programming Language :: Python :: 3',
39+
],
40+
41+
project_urls={ # Optional
42+
'Homepage': 'https://...',
43+
'Source': 'https://...',
44+
'Bug Reports': 'https://...',
45+
'Citation': 'https://...',
46+
},
47+
)

sota_extractor2/__init__.py

Whitespace-only changes.

sota_extractor2/config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import logging
2+
from pathlib import Path
3+
4+
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
5+
datefmt='%m/%d/%Y %H:%M:%S',
6+
level=logging.WARN)
7+
8+
# used only to dynamically fetch graph ql data
9+
graphql_url = 'http://10.0.1.145:8001/graphql/'
10+
11+
# otherwise use this files
12+
data = Path("/mnt/efs/pwc/data")
13+
goldtags_dump = data / "dumps" / "goldtags-2019.06.28_0916.json.gz"
14+
15+
16+
elastic = dict(hosts=['localhost'], timeout=20)

sota_extractor2/data/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import logging
2+
from .. import config # to get logging init
3+
4+
logger = logging.getLogger(__name__)
5+
6+
try:
7+
from db import *
8+
except:
9+
logger.info("Unable to intialise django falling back to json data")
10+
from json import *

sota_extractor2/data/db.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
raise NotImplementedError()

0 commit comments

Comments
 (0)