Skip to content

Commit e3ada91

Browse files
committed
added alliteration file
1 parent db46b75 commit e3ada91

File tree

8 files changed

+129
-81
lines changed

8 files changed

+129
-81
lines changed
-1.48 MB
Binary file not shown.
-2.21 MB
Binary file not shown.

doc/README.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/carmina/alliteration.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""
2+
This module allows users to search for alliteration
3+
within one line of poetry.
4+
"""
5+
6+
def proximity_score(list, a, b):
7+
'''
8+
a and b are two strings that both occur in list. This function
9+
returns the absolute difference in indices between these two strings.
10+
'''
11+
sublist = list[list.index(a):]
12+
score = sublist.index(b)
13+
return score
14+
15+
def check_proximity_allit(score, proximity=4):
16+
'''
17+
Intended to be used in conjunction with proximity_score. Returns
18+
true if the score less than or equal to the proximity, meaning
19+
that the two words are close enough to be considered part of
20+
the same alliteration. Defaults to proximity=4.
21+
'''
22+
return score <= proximity
23+
24+
def find_letters(line):
25+
'''
26+
Given a line of poetry, returns
27+
two dictionaries. letter_counts has the number of words beginning
28+
with each letter, while word_tracker has the actual words.
29+
'''
30+
letter_counts = {}
31+
word_tracker = {}
32+
33+
# get all the words that start with the same letter
34+
for word in line:
35+
word = word.lower()
36+
if word[0] in letter_counts.keys():
37+
letter_counts[word[0]] += 1
38+
word_tracker[word[0]].append(word)
39+
else:
40+
letter_counts[word[0]] = 1
41+
word_tracker[word[0]] = [word]
42+
43+
return letter_counts, word_tracker
44+
45+
def find_allit(line, letter_counts, word_tracker, proximity=4):
46+
'''
47+
Uses the proximity counter to search for proper alliterations.
48+
We define a "proper" alliteration in this case to be one in which
49+
no two consecutive words are greater than the given proximity
50+
distance from each other
51+
'''
52+
pairs = len(line) - 1
53+
allit_pairs = 0
54+
55+
for letter in letter_counts.keys():
56+
if letter_counts[letter] >= 2:
57+
count = letter_counts[letter]
58+
words = word_tracker[letter]
59+
for i in range(0, count - 1, 1):
60+
subset = line[i:]
61+
score = proximity_score(subset, words[i], words[i+1])
62+
63+
if score <= proximity:
64+
allit_pairs += 1
65+
66+
return allit_pairs, pairs

src/carmina/scansion.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,19 @@
66
- A syllabification function that splits a Latin line into syllables.
77
- A function to check whether a syllable is long or short based on
88
Latin metrical rules.
9+
10+
Update as of 13 June 2025:
11+
The syllabify_word function does not properly divide the syllables
12+
in cases where a single consonant is followed by a consonant cluster.
13+
14+
The is_long function needs to be reworked to consider the context of
15+
the entire line rather than looking at each syllable individually.
16+
Currently, it does not properly reflect the algorithmic process used
17+
in manual scansion to determine syllable length.
18+
19+
As the hexameter_line and hexameter_text functions both depend on
20+
these two functions working correctly, neither will produce the
21+
correct output.
922
"""
1023

1124
# Define Latin vowels and diphthongs
@@ -45,7 +58,6 @@ def syllabify_word(text):
4558
current_syllable += text[i]
4659
i += 1
4760
else:
48-
# If it's a consonant, add it to the syllable
4961
current_syllable += text[i]
5062
i += 1
5163

@@ -57,8 +69,9 @@ def syllabify_word(text):
5769
current_syllable = "" # Reset the syllable container
5870

5971
# If anything remains in current_syllable after the loop, append it
72+
# to the last syllable
6073
if current_syllable:
61-
syllables.append(current_syllable)
74+
syllables[-1] = syllables[-1] + current_syllable
6275

6376
return syllables
6477

@@ -79,7 +92,6 @@ def syllabify_line(line):
7992
syllabified_line = []
8093

8194
for text in words:
82-
# Syllabify each word and join syllables with dashes
8395
syllabified_line.extend(syllabify_word(text.lower()))
8496

8597
return syllabified_line
@@ -94,14 +106,15 @@ def print_syllabified_line(syllabified_line):
94106
95107
Outputs:
96108
(str): The syllabified line, with syllables
97-
separated by a dash (-) and words by
98-
a pipe (|)
109+
separated by a dash (-)
99110
"""
100-
return " | ".join(syllabified_line)
111+
return " - ".join(syllabified_line)
101112

102113

103114
def is_long(syllable):
104115
"""
116+
UNDER DEVELOPMENT
117+
105118
Determines whether a given syllable is long according to Latin metrical
106119
rules.
107120
@@ -141,6 +154,8 @@ def is_long(syllable):
141154

142155
def hexameter_line(line):
143156
"""
157+
UNDER DEVELOPMENT
158+
144159
Single-line hexameter parsing
145160
146161
Assuming input is:
@@ -182,5 +197,13 @@ def hexameter_line(line):
182197
def hexameter_text(lines):
183198
"""
184199
Multi-line hexameter parsing
200+
201+
Inputs:
202+
lines (list[str]): A list of lines to be parsed
203+
into hexameter
204+
205+
Outputs:
206+
A list containing the metrical patterns for each
207+
line in the input
185208
"""
186209
return [hexameter_line(line) for line in lines]

src/tests/test_parser.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,6 @@
55
from carmina.examples import EXAMPLE_PATH
66
from carmina import parser
77

8-
# TODO: @Hui-Hsuan @Simon Write functions for parse_txt and parse_xml.
9-
# TODO: Create expected outputs @Hui-Hsuan
10-
# TODO: Write and document test_parse_txt @Simon
11-
# TODO: Write and document test_parse_xml @Hui-Hsuan
12-
13-
# TODO: Create expected output from .txt files.
14-
# Outputs should be lists of strings, where each line is a separate string
15-
# and all words are normalized (i.e., lowercase without punctuation)
168
expected_single_mixed = ["arma virumque cano troiae qui primus ab oris"]
179
expected_multiline = ['arma virumque cano troiae qui primus ab oris',
1810
'italiam fato profugus laviniaque venit litora',
@@ -23,22 +15,32 @@
2315
expected_mixed_elision = ["litora multum ille et terris iactatus et alto"]
2416
expected_single_m_elision = ["venturum excidio libyae sic volvere parcas"]
2517
expected_single_vowel_elision = ["turbine corripuit scopuloque infixit acuto"]
26-
# pseudo-test:
27-
# test_parse_txt <- result of this should be == expected
28-
# test_parse_xml <- result of this should be == expected
2918

3019
def test_parse_txt():
3120
# mixed_elision
21+
path_file = os.path.join(EXAMPLE_PATH, "aeneid_mixed_elision.txt")
22+
result = parser.parse_txt(path_file)
23+
assert result == expected_mixed_elision
3224

3325
# multiline
26+
path_file = os.path.join(EXAMPLE_PATH, "aeneid_multiline.txt")
27+
result = parser.parse_txt(path_file)
28+
assert result == expected_multiline
3429

3530
# single_m_elision
31+
path_file = os.path.join(EXAMPLE_PATH, "aeneid_single_m_elision.txt")
32+
result = parser.parse_txt(path_file)
33+
assert result == expected_single_m_elision
3634

3735
# single_mixed
36+
path_file = os.path.join(EXAMPLE_PATH, "aeneid_single_mixed.txt")
37+
result = parser.parse_txt(path_file)
38+
assert result == expected_single_mixed
3839

3940
# single_vowel_elision
40-
41-
pass
41+
path_file = os.path.join(EXAMPLE_PATH, "aeneid_single_vowel_elision.txt")
42+
result = parser.parse_txt(path_file)
43+
assert result == expected_single_vowel_elision
4244

4345
def test_parse_xml():
4446
# mixed_elision

user-stories/components.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
# Components
22

33
## Language Verification Logic
4+
45
**What it does:** Verifies that the text is in Latin
56
**Inputs:** A `.txt` or `.xml` file
67
**Outputs:** True or false (true if text is Latin, false otherwise)
78
**Components:** Reader that verifies that the text is in Latin, likely against Logeion (online Latin dictionary site).
89
**Side effects?** Will work as part of the larger Data Validation Logic
910

1011
## Data Validation Logic
11-
**What it does:** Verfies the text is in latin, in a txt or xml.file, and reads as poetry.
12+
13+
**What it does:** Verfies the text is in Latin, in a .txt or xml.file, and reads as poetry.
1214
**Inputs:** Must be able to read the file and have be able to create a xml or txt.file copy/version of the file.
1315
**Outputs:** Outputs results to another file xml or txt format that is downloadable to the user.
1416
**Components:** A verification reader that helps verify the language is latin. This will come from an external database (aka the dictionary site we referred back to).
15-
**Side effects?** Not sure.
17+
**Side effects?** Not sure.
1618

1719
## File Parser
20+
1821
**What it does:** Converts the input .txt or .xml file into a structured, machine-readable format for further analysis. Ensures the file is properly segmented into lines of text, identifies headers or metadata, and formats content for compatibility with the scansion function.
1922
**Inputs:** A .txt or .xml file containing Latin text (validated by the Data Validation Logic).
2023
**Outputs:** A structured file or object containing parsed lines of text (e.g., a list or data frame where each line is an element).
@@ -25,8 +28,17 @@ Metadata Extractor: Optionally identifies and stores non-poetic metadata (e.g.,
2528
Side effects? If the input file has formatting issues, parsing errors may occur, requiring fallback mechanisms or error messages for the user.
2629

2730
## Scansion Function
31+
2832
**What it does:** Given a file of dactylic hexameter, returns the scansion and feet of each line
2933
**Inputs:** A parsed Latin file
3034
**Outputs:** Another file that shows the original text and the scansion
3135
**Components:** This is one of the functions that we will write as part of the package. Needs to be able to read lines from a parsed file and apply the scanning algorithm to determine the placement of long and short syllables.
3236
**Side effects?** Probably will not affect other functions, but is dependent on proper parsing and verification
37+
38+
## Alliteration Function
39+
40+
**What it does:** Given a line of hexameter, returns any instances of alliteration
41+
**Inputs:** A parsed Latin file
42+
**Outputs:** A data structure mapping the alliterated letter and its occurrences
43+
**Components:** This is one of the functions that we will write as part of the package. Needs to be able to read lines from a parsed file and apply the alliteration algorithm to detect words that begin with the same initial letter
44+
**Side effects?** Similar to the scansion function, this will probably not affect other functions, but is dependent on proper parsing and verification

user-stories/userstories.md

Lines changed: 5 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,6 @@
1-
# User Stories
1+
# User Stories
22

3-
## Story #1
4-
**User:** Dr.Julia is a univerisity professor specializing in Latin literature.
5-
**Goal:** Dr. Julia wants to analyze the meter of verses in the Aeneid to teach students about the usage of dactylic hexameter.
6-
**Needs and Desires:** Dr. Julia needs a reliable and precise tool for metrical analysis that can automatically identify and parse dactylic patterns. She values accuracy, and the ability to generate detailed, exportable reports for classroom examples.
7-
**Skill Level:** She's an expert in Latin poetry with intermediate programming skills, familiar with Python but not advanced with NLP libraries.
8-
9-
## Story #2
10-
**User:** Sonia is a graduate student in Classics.
11-
**Goal:** Sonia wants to use Carmina to analyze the meter in several latin poetry pieces specifically looking at deviations from traditional dactylic hexameter.
12-
**Needs and Desires:** Sonia wants a flexible tool that can identify metrical variations and provide simple visualizations for research presentations. She values a user-friendly interface and documentation that explains the function of a parameter.
13-
**Skill Level:** She is proficient in Latin, beginner in programming, with limited experience in NLP and Python packages.
14-
15-
## Story #3
16-
**User:** Maria is a high school Latin teacher.
17-
**Goal:** Maria wants Carmina to help her demonstrate metrical patterns of poetry of Ovid to her students as part of an introductory Latin poetry course.
18-
**Needs and Desires:** She wants a straightforward tool that provides clear outputs such as labeling each foot in a line of poetry. She values simplicity over complexity, preferring a few well-documented functions over extensive customization.
19-
**Skill Level:** Maria is proficient in Latin, minimal programming experience; requires a tool that works with simple commands and minimal setup.
20-
21-
## Story #4
22-
**User:** Dr. Roberto is a digital humanities researcher focused on Latin poetry.
23-
**Goal:** Dr. Roberto wants to conduct metrical analysis on Latin poetry collections to identify broader trends in meter across authors.
24-
**Needs and Desires:** He needs a tool that allows for batch processing of texts, with customizable parameters for different poetic meters. Dr. Roberto values customization and the ability to integrate Carmina’s outputs with other data analysis tools.
25-
**Skill Level:** Advanced user with strong programming skills, familiar with Python and NLP concepts.
26-
27-
## Story #5
28-
**User:** Sarah is an undergraduate majoring in Classics and minoring in Data Science.
29-
**Goal:** Sarah wants to analyze metrical patterns in selected poems for her final project, aiming to understand the influence of meter on thematic elements in Latin literature.
30-
**Needs and Desires:** She needs a tool that offers guided usage with example datasets and has an intuitive API that makes it easy to analyze smaller sections of text. She values accessible documentation and error messages that provide clear troubleshooting information.
31-
**Skill Level:** Intermediate Latin knowledge, beginner Python skills, familiar with basic functional programming but not object-oriented programming.
32-
33-
## 1. Metrical Analysis of Latin Verses
34-
As a Classics researcher
35-
I want to automatically identify the meter and foot structure of Latin verses
36-
So that I can perform metrical analysis on poems without manually scanning each line.
37-
38-
## 2. Rhythm Pattern Mapping Across Stanzas
39-
As a Classics researcher interested in rhythmic variation
40-
I want a tool that maps rhythm and stress patterns across multiple stanzas
41-
So that I can identify rhythmic shifts and recurring patterns that contribute to the poem’s overall structure and mood.
42-
43-
## 3. Visualizing Rhythm and Stress Patterns
44-
As a Latin poetry researcher interested in auditory analysis
45-
I want a feature to visualize the rhythm and stress patterns of a poem
46-
So that I can understand the poem’s auditory impact and perform better prosodic analysis.
47-
48-
## 4. Comparative Metrical Analysis Across Poems
49-
50-
As a Latin poetry scholar
51-
I want to compare metrical patterns across multiple poems or authors
52-
So that I can analyze stylistic variations in metrical choices and determine if certain meters correlate with specific themes or genres.
53-
54-
## 5. Foot Structure Visualization and Analysis
55-
As a student learning Latin poetry structure
56-
I want to visualize each foot’s structure within a line
57-
So that I can better understand the function and rhythm of different feet in Latin meter.
3+
The following are some user stories to illustrate potential use cases and users for the `carmina` functionality.
584

595
## Undergrad student
606

@@ -66,12 +12,12 @@ Cicero is working on his PhD. He wants to use the `carmina` package to track ins
6612

6713
## DH Scholar
6814

69-
Vitruvius is interested in looking at alliteration across dactylic hexameter. He doesn't know much Latin, but he does know how to use Python for text analysis. He's done alliteration studies in other natural languages.
15+
Vitruvius is interested in looking at alliteration across dactylic hexameter. He doesn't know much Latin, but he does know how to use Python for text analysis. He's done alliteration studies in other natural languages. He would like to use `carmina` to break down alliteration across several lines of hexameter.
7016

7117
## DH Scholar 2
7218

73-
Hypatia is working with Vitruvius's lab, but she's more interested in looking at alliterative figures within the same line. She's better versed in Latin and Python.
19+
Hypatia is working with Vitruvius's lab, but she's more interested in looking at alliterative figures within the same line. She knows more Latin, but she's new to Python. She will need an intuitive guide to use `carmina`'s alliteration analysis functions in each line of interest.
7420

7521
## New Latin Student
7622

77-
Julius is new to Latin epic, but he doesn't know a lot about scansion. However, his Latin class has a scansion assignment due tomorrow, and he doesn't know how to do it! On the other hand, he does know some Python, and he's familiar enough with Latin to know how to read it. He'll try to use the hexameter scansion function from `carmina` to finish his homework.
23+
Martial is new to Latin epic, but he doesn't know a lot about scansion. However, his Latin class has a scansion assignment due tomorrow, and he doesn't know how to do it! On the other hand, he does know some Python, and he's familiar enough with Latin to know how to read it. He'll try to use the hexameter scansion function from `carmina` to finish his homework.

0 commit comments

Comments
 (0)