Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
23e2798
build: bring pyproject.toml up to current standards
davidlday Jul 22, 2025
e96611e
ci: implement full ci workflow
davidlday Jul 22, 2025
af013a4
ci: implement full release workflow
davidlday Jul 22, 2025
0518ac8
ci: implement full publish workflow
davidlday Jul 22, 2025
d44c063
build(deps): general dependency updates
davidlday Jul 22, 2025
d27f004
ci: add mypy dev dependency
davidlday Jul 22, 2025
c503106
ci: add bandit dev dependency
davidlday Jul 22, 2025
4d31515
build: bump pylint from 2.15.8 to 3.3.7
davidlday Jul 22, 2025
a553767
build: bump pytest from 7.2.0 to 8.4.1
davidlday Jul 22, 2025
0e9bb8e
build: remove pylint overrides
davidlday Jul 22, 2025
b91e865
fix(deps): bump narrative from 1.2.0 to 1.2.1
davidlday Jul 23, 2025
f323533
fix(deps): bump click from 8.1.8 to 8.2.1
davidlday Jul 23, 2025
c3b31a5
build: add type hints to Word
davidlday Jul 25, 2025
72049d2
feat: add type hints to Fragment
davidlday Jul 25, 2025
11e8676
feat: add type hints to FragementCotnainer
davidlday Jul 25, 2025
49d2125
feat: add type hints to Sentence
davidlday Jul 25, 2025
de16960
feat: add type hints to Paragraph
davidlday Jul 25, 2025
4e25df6
feat: add type hints to Prose
davidlday Jul 25, 2025
8974765
feat: add type hints to ReadabilityScores
davidlday Jul 25, 2025
fe0b573
feat: add type hints to cli
davidlday Jul 25, 2025
65d015f
build: pylint ignore similarities
davidlday Jul 25, 2025
f643f33
feat: add type marker
davidlday Jul 25, 2025
e6cffd4
fix: capitalize constants
davidlday Jul 25, 2025
81e3d55
feat: add type hints to Word
davidlday Jul 25, 2025
9476c28
feat: add type hints to Fragment
davidlday Jul 25, 2025
f37e08b
feat: add type hints to FragementCotnainer
davidlday Jul 25, 2025
374b48b
feat: add type hints to Sentence
davidlday Jul 25, 2025
0a2e5c5
feat: add type hints to Paragraph
davidlday Jul 25, 2025
38ade5f
feat: add type hints to Prose
davidlday Jul 25, 2025
b8f85ee
feat: add type hints to ReadabilityScores
davidlday Jul 25, 2025
b9b8b37
feat: add type hints to cli
davidlday Jul 25, 2025
7e68d14
build: pylint ignore similarities
davidlday Jul 25, 2025
242f5e8
feat: add type marker
davidlday Jul 25, 2025
4a211cc
fix: capitalize constants
davidlday Jul 25, 2025
1be7f73
Merge branch 'feat/types' of github.com:prosegrinder/python-prosegrin…
davidlday Jul 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions .github/workflows/pypi-publish.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
name: Publish to PyPi

permissions:
contents: read

on:
push:
tags:
- "v*"

jobs:
lint:
uses: prosegrinder/.github/.github/workflows/poetry-lint.yaml@main
black:
uses: prosegrinder/.github/.github/workflows/poetry-black.yaml@main

pylint:
uses: prosegrinder/.github/.github/workflows/poetry-pylint.yaml@main

mypy:
uses: prosegrinder/.github/.github/workflows/poetry-mypy.yaml@main

bandit:
uses: prosegrinder/.github/.github/workflows/poetry-bandit.yaml@main

test:
needs: lint
needs:
- pylint
- black
- mypy
- bandit
uses: prosegrinder/.github/.github/workflows/poetry-test.yaml@main

publish:
Expand Down
22 changes: 19 additions & 3 deletions .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
name: Python Poetry CI

permissions:
contents: read

on:
pull_request:

Expand All @@ -8,11 +11,24 @@ concurrency:
cancel-in-progress: true

jobs:
lint:
uses: prosegrinder/.github/.github/workflows/poetry-lint.yaml@main
black:
uses: prosegrinder/.github/.github/workflows/poetry-black.yaml@main

pylint:
uses: prosegrinder/.github/.github/workflows/poetry-pylint.yaml@main

mypy:
uses: prosegrinder/.github/.github/workflows/poetry-mypy.yaml@main

bandit:
uses: prosegrinder/.github/.github/workflows/poetry-bandit.yaml@main

test:
needs: lint
needs:
- pylint
- black
- mypy
- bandit
uses: prosegrinder/.github/.github/workflows/poetry-test.yaml@main

cz-dry-run:
Expand Down
28 changes: 27 additions & 1 deletion .github/workflows/python-release.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,40 @@
name: Bump Version and Create Release

permissions:
contents: read

on:
push:
branches:
- main

jobs:
black:
uses: prosegrinder/.github/.github/workflows/poetry-black.yaml@main

pylint:
uses: prosegrinder/.github/.github/workflows/poetry-pylint.yaml@main

mypy:
uses: prosegrinder/.github/.github/workflows/poetry-mypy.yaml@main

bandit:
uses: prosegrinder/.github/.github/workflows/poetry-bandit.yaml@main

test:
needs:
- pylint
- black
- mypy
- bandit
uses: prosegrinder/.github/.github/workflows/poetry-test.yaml@main

release:
if: "!startsWith(github.event.head_commit.message, 'bump:')"
needs: test
if: ${{ !startsWith(github.event.head_commit.message, 'bump:') }}
# Don't run 'bump:'
permissions:
contents: write
uses: prosegrinder/.github/.github/workflows/poetry-release.yaml@main
secrets:
VERSION_BUMP_TAG_TOKEN: "${{ secrets.VERSION_BUMP_TAG_TOKEN }}"
809 changes: 502 additions & 307 deletions poetry.lock

Large diffs are not rendered by default.

69 changes: 44 additions & 25 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,41 +1,60 @@
[project]
name = "prosegrinder"
dynamic = ["version", "classifiers"]
description = "A text analytics library for prose fiction."
license = {text = "GPLv3"}
readme = "README.md"
requires-python = ">=3.9,<4.0"
authors = [
{name = "David L. Day", email = "[email protected]"}
]
keywords = ["text analytics", "prose fiction", "natural language processing", "NLP", "linguistics"]
dependencies = [
"importlib-metadata>=5.1.0",
"cmudict>=1.0.11",
"narrative>=1.1.1",
"pointofview>=1.0.2",
"syllables>=1.0.4",
"click>=8.1.3"
]

[project.scripts]
prosegrinder = "prosegrinder.__main__:cli"

[project.urls]
homepage = "https://github.com/prosegrinder/python-prosegrinder"
repository = "https://github.com/prosegrinder/python-prosegrinder"
"Bug Tracker" = "https://github.com/prosegrinder/python-prosegrinder/issues"

[tool.commitizen]
version = "1.3.8"
tag_format = "v$version"
update_changelog_on_bump = true
changelog_incremental = true
bump_message = "bump: $current_version → $new_version"
version_files = [
"pyproject.toml:version",
]
version_provider = "poetry"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "prosegrinder"
version = "1.3.8"
description = "A text analytics library for prose fiction."
authors = ["David L. Day <[email protected]>"]
license = "GPLv3"
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.7.2"
importlib-metadata = "^5.1.0"
cmudict = "^1.0.11"
narrative = "^1.1.1"
pointofview = "^1.0.2"
syllables = "^1.0.4"
click = "^8.1.3"
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Topic :: Text Processing"
]

[tool.poetry.group.dev.dependencies]
pytest = "^7.2.0"
pylint = "^2.15.8"
black = ">=22.12,<24.0"
mypy = "^1.17.0"
bandit = "^1.8.6"
pylint = "^3.3.7"
pytest = "^8.4.1"

[tool.poetry.scripts]
prosegrinder = "prosegrinder.__main__:cli"

[tool.pylint."messages control"]
disable = ["duplicate-code", "too-many-instance-attributes", "too-many-arguments"]
[tool.pylint.'MESSAGES CONTROL']
disable = "similarities"
6 changes: 3 additions & 3 deletions src/prosegrinder/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@
@click.option(
"-s", "--save", required=False, type=click.File("w"), help="File to save output to."
)
def cli(files, save, indent):
def cli(files: list[click.File], save: click.File, indent: int) -> None:
"""Setup the command line interface"""
processed_files = []
for file in files:
filename = click.format_filename(file.name)
text = file.read()
text = file.read() # type: ignore
_p = Prose(text)
_d = {"filename": filename, "statistics": _p.stats}
processed_files.append(_d)
_j = json.dumps(processed_files, indent=indent)
if save:
save.write(_j)
save.write(_j) # type: ignore
else:
click.echo(_j)
21 changes: 12 additions & 9 deletions src/prosegrinder/fragment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Fragment class for prosegrinder."""

import re
from collections import Counter

Expand All @@ -8,10 +9,10 @@
from prosegrinder.word import Word


class Fragment:
class Fragment: # pylint: disable=too-many-instance-attributes
"""A fragment of text."""

def __init__(self, text, dictionary=Dictionary()):
def __init__(self, text: str, dictionary: Dictionary = Dictionary()):
self.text = text
self.dictionary = dictionary
self.normalized_sentence = dictionary.normalize_text(text)
Expand All @@ -21,8 +22,8 @@ def __init__(self, text, dictionary=Dictionary()):
]
self.word_count = len(self.words)
self.word_character_count = sum(word.character_count for word in self.words)
_pf = Counter()
_pc = 0
_pf: Counter = Counter()
_pc: int = 0
for word in self.words:
_pf.update(word.phone_frequency)
_pc += word.phone_count
Expand All @@ -41,7 +42,7 @@ def __init__(self, text, dictionary=Dictionary()):
self.third_person_word_count = sum(
word.is_third_person_word for word in self.words
)
self.word_frequency = dict(Counter(self.words))
self.word_frequency = dict(Counter(word.text for word in self.words))
self.unique_words = self.word_frequency.keys()
self.unique_word_count = len(self.unique_words)
self.pov = pointofview.NONE
Expand All @@ -52,17 +53,19 @@ def __init__(self, text, dictionary=Dictionary()):
elif self.third_person_word_count > 0:
self.pov = pointofview.THIRD

def __eq__(self, other):
def __eq__(self, other: object) -> bool:
"""Equality operator for instance variables."""
if not isinstance(other, Fragment):
return False
return self.text == other.text

def __hash__(self):
def __hash__(self) -> int:
"""Hash operator for instance variables."""
return hash(self.text)

def frequency(self, word_string):
def frequency(self, word_string: str) -> int:
"""Returns the frequency of a word in the fragment."""
return self.word_frequency[word_string]
return self.word_frequency.get(word_string, 0)

@property
def stats(self):
Expand Down
23 changes: 15 additions & 8 deletions src/prosegrinder/fragment_container.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
"""Fragment container class for prosegrinder."""

from collections import Counter
from collections.abc import Sequence

import pointofview

from prosegrinder.dictionary import Dictionary
from prosegrinder.fragment import Fragment


class FragmentContainer:
class FragmentContainer: # pylint: disable=too-many-instance-attributes
"""A container for fragments."""

def __init__(self, fragments, dictionary=Dictionary()):
def __init__(
self, fragments: Sequence[Fragment], dictionary: Dictionary = Dictionary()
):
self.dictionary = dictionary
self.fragments = fragments or []
self.fragment_count = len(self.fragments)
Expand Down Expand Up @@ -38,9 +43,9 @@ def __init__(self, fragments, dictionary=Dictionary()):
self.third_person_word_count = sum(
fragment.third_person_word_count for fragment in self.fragments
)
_wf = Counter()
_pf = Counter()
_pc = 0
_wf: Counter = Counter()
_pf: Counter = Counter()
_pc: int = 0
for fragment in self.fragments:
_wf.update(fragment.words)
_pf.update(fragment.phone_frequency)
Expand All @@ -58,14 +63,16 @@ def __init__(self, fragments, dictionary=Dictionary()):
elif self.third_person_word_count > 0:
self.pov = pointofview.THIRD

def __eq__(self, other):
def __eq__(self, other: object) -> bool:
"""Equality operator for instance variables."""
if not isinstance(other, FragmentContainer):
return False
return self.fragments == other.fragments

def __hash__(self):
def __hash__(self) -> int:
"""Hash operator for instance variables."""
return hash(self.fragments)

def frequency(self, word_string):
def frequency(self, word_string: str) -> int:
"""Gets the frequency of a word in the fragment container."""
return self.word_frequency[self.dictionary.get_word(word_string)]
11 changes: 8 additions & 3 deletions src/prosegrinder/paragraph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Paragraph class for prosegrinder."""

import re

import narrative
Expand All @@ -14,7 +15,7 @@ class Paragraph(FragmentContainer):

RE_PARAGRAPH = re.compile(".*(?=\\n|$)")

def __init__(self, text, dictionary=Dictionary()):
def __init__(self, text: str, dictionary: Dictionary = Dictionary()):
self.text = text
self.dictionary = dictionary
self.sentences = Sentence.parse_sentences(self.text, self.dictionary)
Expand All @@ -29,12 +30,16 @@ def __init__(self, text, dictionary=Dictionary()):
self.pov = self.narrative.pov
super().__init__(self.sentences, self.dictionary)

def __eq__(self, other):
def __eq__(self, other: object) -> bool:
"""Equality operator for instance variables."""
if not isinstance(other, Paragraph):
return False
return self.text == other.text

@staticmethod
def parse_paragraphs(text, dictionary=Dictionary()):
def parse_paragraphs(
text: str, dictionary: Dictionary = Dictionary()
) -> list["Paragraph"]:
"""Parses a text into a list of Paragraph objects."""
return [
Paragraph(paragraph, dictionary)
Expand Down
Loading