infinite0x20
diff --git a/‎doc/CSE 583 Final Presentation 'Carmina' .pdf‎
-1.48 MB b/‎doc/CSE 583 Final Presentation 'Carmina' .pdf‎
-1.48 MB
diff --git a/‎doc/CSE 583 Final Presentation 'Carmina' .pptx‎
-2.21 MB b/‎doc/CSE 583 Final Presentation 'Carmina' .pptx‎
-2.21 MB
diff --git a/‎doc/README.md‎
Lines changed: 0 additions & 1 deletion b/‎doc/README.md‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/carmina/alliteration.py‎
Lines changed: 66 additions & 0 deletions b/‎src/carmina/alliteration.py‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎src/carmina/scansion.py‎
Lines changed: 29 additions & 6 deletions b/‎src/carmina/scansion.py‎
Lines changed: 29 additions & 6 deletions
diff --git a/‎src/tests/test_parser.py‎
Lines changed: 15 additions & 13 deletions b/‎src/tests/test_parser.py‎
Lines changed: 15 additions & 13 deletions
diff --git a/‎user-stories/components.md‎
Lines changed: 14 additions & 2 deletions b/‎user-stories/components.md‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎user-stories/userstories.md‎
Lines changed: 5 additions & 59 deletions b/‎user-stories/userstories.md‎
Lines changed: 5 additions & 59 deletions
@@ -0,0 +1,66 @@
+"""
+This module allows users to search for alliteration
+within one line of poetry. 
+"""
+        
+def proximity_score(list, a, b):
+    '''
+    a and b are two strings that both occur in list. This function
+    returns the absolute difference in indices between these two strings.
+    '''
+    sublist = list[list.index(a):]
+    score = sublist.index(b)
+    return score
+
+def check_proximity_allit(score, proximity=4):
+    '''
+    Intended to be used in conjunction with proximity_score. Returns 
+    true if the score less than or equal to the proximity, meaning
+    that the two words are close enough to be considered part of
+    the same alliteration. Defaults to proximity=4.
+    '''
+    return score <= proximity
+
+def find_letters(line):
+    '''
+    Given a line of poetry, returns
+    two dictionaries. letter_counts has the number of words beginning
+    with each letter, while word_tracker has the actual words.
+    '''
+    letter_counts = {}
+    word_tracker = {}
+
+    # get all the words that start with the same letter
+    for word in line:
+        word = word.lower()
+        if word[0] in letter_counts.keys():
+            letter_counts[word[0]] += 1
+            word_tracker[word[0]].append(word)
+        else:
+            letter_counts[word[0]] = 1
+            word_tracker[word[0]] = [word]
+
+    return letter_counts, word_tracker
+
+def find_allit(line, letter_counts, word_tracker, proximity=4):
+    '''
+    Uses the proximity counter to search for proper alliterations.
+    We define a "proper" alliteration in this case to be one in which
+    no two consecutive words are greater than the given proximity
+    distance from each other
+    '''
+    pairs = len(line) - 1
+    allit_pairs = 0
+
+    for letter in letter_counts.keys():
+        if letter_counts[letter] >= 2:
+            count = letter_counts[letter]
+            words = word_tracker[letter]
+            for i in range(0, count - 1, 1):
+                subset = line[i:]
+                score = proximity_score(subset, words[i], words[i+1])
+
+                if score <= proximity:
+                    allit_pairs += 1
+
+    return allit_pairs, pairs
@@ -6,6 +6,19 @@
 - A syllabification function that splits a Latin line into syllables.
 - A function to check whether a syllable is long or short based on
   Latin metrical rules.
+
+Update as of 13 June 2025:
+    The syllabify_word function does not properly divide the syllables
+    in cases where a single consonant is followed by a consonant cluster.
+
+    The is_long function needs to be reworked to consider the context of
+    the entire line rather than looking at each syllable individually.
+    Currently, it does not properly reflect the algorithmic process used
+    in manual scansion to determine syllable length.
+
+    As the hexameter_line and hexameter_text functions both depend on
+    these two functions working correctly, neither will produce the
+    correct output.
 """
 
 # Define Latin vowels and diphthongs
@@ -45,7 +58,6 @@ def syllabify_word(text):
                 current_syllable += text[i]
                 i += 1
         else:
-            # If it's a consonant, add it to the syllable
             current_syllable += text[i]
             i += 1
 
@@ -57,8 +69,9 @@ def syllabify_word(text):
             current_syllable = ""  # Reset the syllable container
 
     # If anything remains in current_syllable after the loop, append it
+    #   to the last syllable
     if current_syllable:
-        syllables.append(current_syllable)
+        syllables[-1] = syllables[-1] + current_syllable
 
     return syllables
 
@@ -79,7 +92,6 @@ def syllabify_line(line):
     syllabified_line = []
 
     for text in words:
-        # Syllabify each word and join syllables with dashes
         syllabified_line.extend(syllabify_word(text.lower()))
 
     return syllabified_line
@@ -94,14 +106,15 @@ def print_syllabified_line(syllabified_line):
 
     Outputs:
         (str): The syllabified line, with syllables
-               separated by a dash (-) and words by
-               a pipe (|)
+               separated by a dash (-)
     """
-    return " | ".join(syllabified_line)
+    return " - ".join(syllabified_line)
 
 
 def is_long(syllable):
     """
+    UNDER DEVELOPMENT
+
     Determines whether a given syllable is long according to Latin metrical
     rules.
 
@@ -141,6 +154,8 @@ def is_long(syllable):
 
 def hexameter_line(line):
     """
+    UNDER DEVELOPMENT
+
     Single-line hexameter parsing
 
     Assuming input is:
@@ -182,5 +197,13 @@ def hexameter_line(line):
 def hexameter_text(lines):
     """
     Multi-line hexameter parsing
+
+    Inputs:
+        lines (list[str]): A list of lines to be parsed
+                into hexameter
+
+    Outputs:
+        A list containing the metrical patterns for each
+        line in the input
     """
     return [hexameter_line(line) for line in lines]
@@ -5,14 +5,6 @@
 from carmina.examples import EXAMPLE_PATH
 from carmina import parser
 
-# TODO: @Hui-Hsuan @Simon Write functions for parse_txt and parse_xml.
-# TODO: Create expected outputs @Hui-Hsuan
-# TODO: Write and document test_parse_txt @Simon
-# TODO: Write and document test_parse_xml @Hui-Hsuan
-
-# TODO: Create expected output from .txt files.
-# Outputs should be lists of strings, where each line is a separate string
-#   and all words are normalized (i.e., lowercase without punctuation)
 expected_single_mixed = ["arma virumque cano troiae qui primus ab oris"]
 expected_multiline = ['arma virumque cano troiae qui primus ab oris',
  'italiam fato profugus laviniaque venit litora',
@@ -23,22 +15,32 @@
 expected_mixed_elision = ["litora multum ille et terris iactatus et alto"]
 expected_single_m_elision = ["venturum excidio libyae sic volvere parcas"]
 expected_single_vowel_elision = ["turbine corripuit scopuloque infixit acuto"]
-# pseudo-test:
-# test_parse_txt <- result of this should be == expected
-# test_parse_xml <- result of this should be == expected
 
 def test_parse_txt():
     # mixed_elision
+    path_file = os.path.join(EXAMPLE_PATH, "aeneid_mixed_elision.txt")
+    result = parser.parse_txt(path_file) 
+    assert result == expected_mixed_elision
 
     # multiline
+    path_file = os.path.join(EXAMPLE_PATH, "aeneid_multiline.txt")
+    result = parser.parse_txt(path_file) 
+    assert result == expected_multiline
 
     # single_m_elision
+    path_file = os.path.join(EXAMPLE_PATH, "aeneid_single_m_elision.txt")
+    result = parser.parse_txt(path_file) 
+    assert result == expected_single_m_elision
 
     # single_mixed
+    path_file = os.path.join(EXAMPLE_PATH, "aeneid_single_mixed.txt")
+    result = parser.parse_txt(path_file) 
+    assert result == expected_single_mixed
 
     # single_vowel_elision
-
-    pass
+    path_file = os.path.join(EXAMPLE_PATH, "aeneid_single_vowel_elision.txt")
+    result = parser.parse_txt(path_file) 
+    assert result == expected_single_vowel_elision
 
 def test_parse_xml():
     # mixed_elision
 
@@ -1,20 +1,23 @@
 # Components
 
 ## Language Verification Logic
+
 **What it does:**  Verifies that the text is in Latin  
 **Inputs:**  A `.txt` or `.xml` file  
 **Outputs:**  True or false (true if text is Latin, false otherwise)  
 **Components:**  Reader that verifies that the text is in Latin, likely against Logeion (online Latin dictionary site).  
 **Side effects?**  Will work as part of the larger Data Validation Logic
 
 ## Data Validation Logic
-**What it does:** Verfies the text is in latin, in a txt or xml.file, and reads as poetry.  
+
+**What it does:** Verfies the text is in Latin, in a .txt or xml.file, and reads as poetry.  
 **Inputs:** Must be able to read the file and have be able to create a xml or txt.file copy/version of the file.  
 **Outputs:** Outputs results to another file xml or txt format that is downloadable to the user.  
 **Components:** A verification reader that helps verify the language is latin. This will come from an external database (aka the dictionary site we referred back to).  
-**Side effects?** Not sure. 
+**Side effects?** Not sure.
 
 ## File Parser
+
 **What it does:** Converts the input .txt or .xml file into a structured, machine-readable format for further analysis. Ensures the file is properly segmented into lines of text, identifies headers or metadata, and formats content for compatibility with the scansion function.
 **Inputs:** A .txt or .xml file containing Latin text (validated by the Data Validation Logic).
 **Outputs:** A structured file or object containing parsed lines of text (e.g., a list or data frame where each line is an element).
@@ -25,8 +28,17 @@ Metadata Extractor: Optionally identifies and stores non-poetic metadata (e.g.,
 Side effects? If the input file has formatting issues, parsing errors may occur, requiring fallback mechanisms or error messages for the user.
 
 ## Scansion Function
+
 **What it does:**  Given a file of dactylic hexameter, returns the scansion and feet of each line  
 **Inputs:**  A parsed Latin file  
 **Outputs:**  Another file that shows the original text and the scansion  
 **Components:**  This is one of the functions that we will write as part of the package. Needs to be able to read lines from a parsed file and apply the scanning algorithm to determine the placement of long and short syllables.  
 **Side effects?**  Probably will not affect other functions, but is dependent on proper parsing and verification
+
+## Alliteration Function
+
+**What it does:**  Given a line of hexameter, returns any instances of alliteration
+**Inputs:**  A parsed Latin file  
+**Outputs:**  A data structure mapping the alliterated letter and its occurrences
+**Components:**  This is one of the functions that we will write as part of the package. Needs to be able to read lines from a parsed file and apply the alliteration algorithm to detect words that begin with the same initial letter
+**Side effects?** Similar to the scansion function, this will probably not affect other functions, but is dependent on proper parsing and verification
@@ -1,60 +1,6 @@
-# User Stories 
+# User Stories
 
-## Story #1 
-**User:** Dr.Julia is a univerisity professor specializing in Latin literature.  
-**Goal:**  Dr. Julia wants to analyze the meter of verses in the Aeneid to teach students about the usage of dactylic hexameter. 
-**Needs and Desires:** Dr. Julia needs a reliable and precise tool for metrical analysis that can automatically identify and parse dactylic patterns. She values accuracy, and the ability to generate detailed, exportable reports for classroom examples. 
-**Skill Level:** She's an expert in Latin poetry with intermediate programming skills, familiar with Python but not advanced with NLP libraries.   
-
- ## Story #2 
-**User:** Sonia is a graduate student in Classics.   
-**Goal:** Sonia wants to use Carmina to analyze the meter in several latin poetry pieces specifically looking at deviations from traditional dactylic hexameter.  
-**Needs and Desires:** Sonia wants a flexible tool that can identify metrical variations and provide simple visualizations for research presentations. She values a user-friendly interface and documentation that explains the function of a parameter.  
-**Skill Level:** She is proficient in Latin, beginner in programming, with limited experience in NLP and Python packages.  
-
-## Story #3 
-**User:** Maria is a high school Latin teacher.   
-**Goal:** Maria wants Carmina to help her demonstrate metrical patterns of poetry of Ovid to her students as part of an introductory Latin poetry course.  
-**Needs and Desires:** She wants a straightforward tool that provides clear outputs such as labeling each foot in a line of poetry. She values simplicity over complexity, preferring a few well-documented functions over extensive customization.   
-**Skill Level:** Maria is proficient in Latin, minimal programming experience; requires a tool that works with simple commands and minimal setup.   
-
-## Story #4 
-**User:** Dr. Roberto is a digital humanities researcher focused on Latin poetry.   
-**Goal:**  Dr. Roberto wants to conduct metrical analysis on Latin poetry collections to identify broader trends in meter across authors.
-**Needs and Desires:** He needs a tool that allows for batch processing of texts, with customizable parameters for different poetic meters. Dr. Roberto values customization and the ability to integrate Carmina’s outputs with other data analysis tools. 
-**Skill Level:** Advanced user with strong programming skills, familiar with Python and NLP concepts. 
-
-## Story #5 
-**User:** Sarah is an undergraduate majoring in Classics and minoring in Data Science. 
-**Goal:** Sarah wants to analyze metrical patterns in selected poems for her final project, aiming to understand the influence of meter on thematic elements in Latin literature. 
-**Needs and Desires:** She needs a tool that offers guided usage with example datasets and has an intuitive API that makes it easy to analyze smaller sections of text. She values accessible documentation and error messages that provide clear troubleshooting information. 
-**Skill Level:** Intermediate Latin knowledge, beginner Python skills, familiar with basic functional programming but not object-oriented programming.
-
-## 1. Metrical Analysis of Latin Verses
-As a Classics researcher
-I want to automatically identify the meter and foot structure of Latin verses
-So that I can perform metrical analysis on poems without manually scanning each line.
-
-## 2. Rhythm Pattern Mapping Across Stanzas
-As a Classics researcher interested in rhythmic variation
-I want a tool that maps rhythm and stress patterns across multiple stanzas
-So that I can identify rhythmic shifts and recurring patterns that contribute to the poem’s overall structure and mood.
-
-## 3. Visualizing Rhythm and Stress Patterns
-As a Latin poetry researcher interested in auditory analysis
-I want a feature to visualize the rhythm and stress patterns of a poem
-So that I can understand the poem’s auditory impact and perform better prosodic analysis.
-
-## 4. Comparative Metrical Analysis Across Poems
-
-As a Latin poetry scholar
-I want to compare metrical patterns across multiple poems or authors
-So that I can analyze stylistic variations in metrical choices and determine if certain meters correlate with specific themes or genres.
-
-## 5. Foot Structure Visualization and Analysis
-As a student learning Latin poetry structure
-I want to visualize each foot’s structure within a line
-So that I can better understand the function and rhythm of different feet in Latin meter.
+The following are some user stories to illustrate potential use cases and users for the `carmina` functionality.
 
 ## Undergrad student
 
@@ -66,12 +12,12 @@ Cicero is working on his PhD. He wants to use the `carmina` package to track ins
 
 ## DH Scholar
 
-Vitruvius is interested in looking at alliteration across dactylic hexameter. He doesn't know much Latin, but he does know how to use Python for text analysis. He's done alliteration studies in other natural languages.
+Vitruvius is interested in looking at alliteration across dactylic hexameter. He doesn't know much Latin, but he does know how to use Python for text analysis. He's done alliteration studies in other natural languages. He would like to use `carmina` to break down alliteration across several lines of hexameter.
 
 ## DH Scholar 2
 
-Hypatia is working with Vitruvius's lab, but she's more interested in looking at alliterative figures within the same line. She's better versed in Latin and Python.
+Hypatia is working with Vitruvius's lab, but she's more interested in looking at alliterative figures within the same line. She knows more Latin, but she's new to Python. She will need an intuitive guide to use `carmina`'s alliteration analysis functions in each line of interest.
 
 ## New Latin Student
 
-Julius is new to Latin epic, but he doesn't know a lot about scansion. However, his Latin class has a scansion assignment due tomorrow, and he doesn't know how to do it! On the other hand, he does know some Python, and he's familiar enough with Latin to know how to read it. He'll try to use the hexameter scansion function from `carmina` to finish his homework.
+Martial is new to Latin epic, but he doesn't know a lot about scansion. However, his Latin class has a scansion assignment due tomorrow, and he doesn't know how to do it! On the other hand, he does know some Python, and he's familiar enough with Latin to know how to read it. He'll try to use the hexameter scansion function from `carmina` to finish his homework.