Skip to content

Commit 5e04e3e

Browse files
authored
Merge pull request #13 from SkBlaz/copilot/fix-12
Improve repository code quality: fix syntax errors, modernize packaging, enhance CI/CD, and resolve test failures
2 parents 3375804 + 4fc3b99 commit 5e04e3e

File tree

8 files changed

+113
-69
lines changed

8 files changed

+113
-69
lines changed

.github/workflows/core-install.yml

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
# This workflow will install Python dependencies, run tests and lint with a single version of Python
2-
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3-
4-
name: Build
1+
name: Build and Test
52

63
on:
74
push:
@@ -11,15 +8,14 @@ on:
118

129
jobs:
1310
build:
14-
1511
runs-on: ubuntu-latest
1612

1713
steps:
18-
- uses: actions/checkout@v2
14+
- uses: actions/checkout@v4
1915
- name: Set up Python 3.11
20-
uses: actions/setup-python@v2
16+
uses: actions/setup-python@v4
2117
with:
22-
python-version: 3.11
18+
python-version: '3.11'
2319
- name: Install dependencies
2420
run: |
2521
python -m pip install --upgrade pip
@@ -30,7 +26,7 @@ jobs:
3026
# stop the build if there are Python syntax errors or undefined names
3127
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
3228
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
33-
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
29+
flake8 . --count --exit-zero --max-complexity=15 --max-line-length=127 --statistics
3430
- name: Test with pytest
3531
run: |
36-
cd tests; py.test;
32+
python -m pytest tests/ -v

.github/workflows/pylint.yml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
1-
name: Py 3.11
1+
name: Python 3.11
22

33
on: [push]
44

55
jobs:
66
build:
7-
87
runs-on: ubuntu-latest
98

109
steps:
11-
- uses: actions/checkout@v2
10+
- uses: actions/checkout@v4
1211

1312
- name: Set up Python 3.11
14-
uses: actions/setup-python@v2
13+
uses: actions/setup-python@v4
1514
with:
16-
python-version: 3.11
15+
python-version: '3.11'
1716

1817
- name: Install dependencies
1918
run: |

autoBOTLib/__init__.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
import os
2+
import logging
3+
import nltk
4+
5+
# Configure logging first
6+
logging.basicConfig(format='%(asctime)s - %(message)s',
7+
datefmt='%d-%b-%y %H:%M:%S')
8+
logging.getLogger().setLevel(logging.INFO)
9+
10+
# Set environment variables
11+
os.environ['TOKENIZERS_PARALLELISM'] = "false"
12+
13+
# Download NLTK resources
14+
nltk.download('stopwords', quiet=True)
15+
nltk.download('punkt_tab', quiet=True)
16+
nltk.download('averaged_perceptron_tagger_eng', quiet=True)
17+
18+
# Import all module functionality
119
from autoBOTLib.features.features_keyword import *
220
from autoBOTLib.features.features_contextual import *
321
from autoBOTLib.features.features_token_relations import *
@@ -10,17 +28,3 @@
1028
from autoBOTLib.optimization.optimization_feature_constructors import *
1129
from autoBOTLib.optimization.optimization_engine import *
1230
from autoBOTLib.misc.misc_helpers import *
13-
14-
import nltk
15-
nltk.download('stopwords', quiet=True)
16-
nltk.download('punkt_tab', quiet=True)
17-
nltk.download('averaged_perceptron_tagger_eng', quiet=True)
18-
19-
import os
20-
import logging
21-
22-
logging.basicConfig(format='%(asctime)s - %(message)s',
23-
datefmt='%d-%b-%y %H:%M:%S')
24-
logging.getLogger().setLevel(logging.INFO)
25-
26-
os.environ['TOKENIZERS_PARALLELISM'] = "false"

autoBOTLib/__main__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@ def main():
4545
"--framework",
4646
default="scikit",
4747
type=str,
48-
help=
49-
"The computational ML back-end to use. Currently supports scikit (Default) and pyTorch (neural nets for sparse inputs)"
48+
help="The computational ML back-end to use. Currently supports scikit (Default) and pyTorch (neural nets for sparse inputs)"
5049
)
5150
parser.add_argument("--memory_storage", default="memory", type=str)
5251
parser.add_argument("--sparsity", default=0.05, type=float)

autoBOTLib/misc/misc_keyword_detection.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,8 @@ def corpus_graph(self,
8080

8181
def process_line(line):
8282

83-
nonlocal G
8483
nonlocal ctx
8584
nonlocal reps
86-
nonlocal dictionary_with_counts_of_pairs
8785

8886
stop = list(string.punctuation)
8987
line = line.strip()

pyproject.toml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
[project]
2+
name = "autoBOTLib"
3+
description = "AutoBOT: Explainable AutoML for texts"
4+
authors = [
5+
{name = "Blaž Škrlj", email = "blaz.skrlj@ijs.si"},
6+
]
7+
license = {text = "BSD-3-Clause-Clear"}
8+
readme = "README.md"
9+
requires-python = ">=3.8"
10+
dynamic = ["version", "dependencies", "classifiers", "scripts"]
11+
12+
[build-system]
13+
requires = ["setuptools>=61.0", "wheel"]
14+
build-backend = "setuptools.build_meta"
15+
16+
[tool.flake8]
17+
max-line-length = 127
18+
max-complexity = 15
19+
ignore = [
20+
"E402", # module level import not at top of file (temporarily ignore due to NLTK downloads)
21+
"F401", # imported but unused (temporarily ignore due to wildcard imports)
22+
"F403", # star import used (temporarily ignore while maintaining compatibility)
23+
"W503", # line break before binary operator
24+
]
25+
exclude = [
26+
".git",
27+
"__pycache__",
28+
".pytest_cache",
29+
"build",
30+
"dist",
31+
"*.egg-info",
32+
".tox",
33+
".venv",
34+
]
35+
36+
[tool.pytest.ini_options]
37+
testpaths = ["tests"]
38+
python_files = ["test_*.py", "*_test.py"]
39+
python_classes = ["Test*"]
40+
python_functions = ["test_*"]
41+
addopts = [
42+
"-v",
43+
"--tb=short",
44+
]
45+
46+
[tool.isort]
47+
profile = "black"
48+
multi_line_output = 3
49+
line_length = 127
50+
known_first_party = ["autoBOTLib"]

setup.py

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,22 @@
1-
from os import path
1+
from pathlib import Path
22
from setuptools import setup, find_packages
3-
from setuptools.command.install import install
4-
import subprocess
5-
import sys
3+
64

75
def parse_requirements(file):
6+
"""Parse requirements from requirements.txt file."""
87
required_packages = []
9-
with open(path.join(path.dirname(__file__), file)) as req_file:
10-
for line in req_file:
11-
# Exclude any comments or empty lines
12-
line = line.strip()
13-
if line and not line.startswith("#"):
14-
required_packages.append(line)
8+
requirements_path = Path(__file__).parent / file
9+
try:
10+
with open(requirements_path) as req_file:
11+
for line in req_file:
12+
# Exclude any comments or empty lines
13+
line = line.strip()
14+
if line and not line.startswith("#"):
15+
required_packages.append(line)
16+
except FileNotFoundError:
17+
print(f"Warning: {file} not found. Using default requirements.")
1518
return required_packages
1619

17-
class PostInstallCommand(install):
18-
"""Post-installation for downloading NLTK resources."""
19-
def run(self):
20-
install.run(self)
21-
22-
try:
23-
import nltk
24-
except ImportError:
25-
print("NLTK is not installed. Installing NLTK...")
26-
subprocess.check_call([sys.executable, "-m", "pip", "install", "nltk"])
27-
import nltk
28-
try:
29-
print("Downloading NLTK 'stopwords' resource...")
30-
for lib in ['stopwords', 'punkt_tab', 'averaged_perceptron_tagger_eng']:
31-
subprocess.check_call([sys.executable, "-m", "nltk.downloader", lib])
32-
print(f"NLTK {lib} downloaded successfully.")
33-
except subprocess.CalledProcessError as e:
34-
print(f"Failed to download NLTK 'stopwords': {e}")
35-
sys.exit(1) # Exit with error code
36-
3720
long_description = """
3821
autoBOT is an AutoML system for text classification with an emphasis on explainability.
3922
It implements the idea of *representation evolution*, learning to combine representations
@@ -58,5 +41,19 @@ def run(self):
5841
packages=packages,
5942
zip_safe=False,
6043
include_package_data=True,
61-
install_requires=parse_requirements("requirements.txt")
44+
install_requires=parse_requirements("requirements.txt"),
45+
classifiers=[
46+
"Development Status :: 4 - Beta",
47+
"Intended Audience :: Developers",
48+
"Intended Audience :: Science/Research",
49+
"License :: OSI Approved :: BSD License",
50+
"Programming Language :: Python :: 3",
51+
"Programming Language :: Python :: 3.8",
52+
"Programming Language :: Python :: 3.9",
53+
"Programming Language :: Python :: 3.10",
54+
"Programming Language :: Python :: 3.11",
55+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
56+
"Topic :: Text Processing :: Linguistic",
57+
],
58+
python_requires=">=3.8",
6259
)

tests/minimal_functionality_test.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
from sklearn.feature_extraction.text import TfidfVectorizer
77
from sklearn import pipeline ## A necessary import
88
import pytest
9+
import os
910

1011

1112
def test_minimal_mlc():
1213
## Load example data frame
13-
dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
14+
dataframe = pd.read_csv("data/insults/train.tsv", sep="\t")
1415
train_sequences = dataframe['text_a']
1516
train_targets_c1 = dataframe['label'].values.tolist()
1617
train_targets_c2 = [
@@ -32,7 +33,7 @@ def test_minimal_mlc():
3233
strategy="direct-learning"
3334
) ## strategy = "direct-learning" trains a single learner.
3435

35-
dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
36+
dataframe2 = pd.read_csv("data/insults/test.tsv", sep="\t")
3637
test_sequences = dataframe2['text_a']
3738
predictions = autoBOTLibObj.predict(test_sequences)
3839
prob_predictions = autoBOTLibObj.predict_proba(test_sequences)
@@ -45,7 +46,7 @@ def test_minimal_mlc():
4546

4647
def test_minimal():
4748
## Load example data frame
48-
dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t").iloc[:500]
49+
dataframe = pd.read_csv("data/insults/train.tsv", sep="\t").iloc[:500]
4950
train_sequences = dataframe['text_a']
5051
train_targets = dataframe['label']
5152

@@ -63,7 +64,7 @@ def test_minimal():
6364
strategy="evolution"
6465
) ## strategy = "direct-learning" trains a single learner.
6566

66-
dataframe2 = pd.read_csv("../data/insults/test.tsv", sep="\t")
67+
dataframe2 = pd.read_csv("data/insults/test.tsv", sep="\t")
6768
test_sequences = dataframe2['text_a']
6869
predictions = autoBOTLibObj.predict(test_sequences)
6970
prob_predictions = autoBOTLibObj.predict_proba(test_sequences)
@@ -82,7 +83,7 @@ def test_minimal():
8283
def test_initializations(fold_number, representation_type, sparsity,
8384
time_constraint):
8485

85-
dataframe = pd.read_csv("../data/insults/train.tsv", sep="\t")
86+
dataframe = pd.read_csv("data/insults/train.tsv", sep="\t")
8687
train_sequences = dataframe['text_a']
8788
train_targets = dataframe['label']
8889
autoBOTLibObj = autoBOTLib.GAlearner(

0 commit comments

Comments
 (0)