From 961f724ca667bb93cb518b51ce9ee803518db4ce Mon Sep 17 00:00:00 2001
From: Philipp Burckhardt <pburckhardt@outlook.com>
Date: Sat, 22 Feb 2025 16:59:05 -0500
Subject: [PATCH 1/2] fix: ensure `nlp/sentencize` handles punctuation in
 quotation marks properly

---
type: pre_commit_static_analysis_report
description: Results of running static analysis checks when committing changes.
report:
  - task: lint_filenames
    status: passed
  - task: lint_editorconfig
    status: passed
  - task: lint_markdown
    status: na
  - task: lint_package_json
    status: na
  - task: lint_repl_help
    status: na
  - task: lint_javascript_src
    status: passed
  - task: lint_javascript_cli
    status: na
  - task: lint_javascript_examples
    status: na
  - task: lint_javascript_tests
    status: passed
  - task: lint_javascript_benchmarks
    status: na
  - task: lint_python
    status: na
  - task: lint_r
    status: na
  - task: lint_c_src
    status: na
  - task: lint_c_examples
    status: na
  - task: lint_c_benchmarks
    status: na
  - task: lint_c_tests_fixtures
    status: na
  - task: lint_shell
    status: na
  - task: lint_typescript_declarations
    status: na
  - task: lint_typescript_tests
    status: na
  - task: lint_license_headers
    status: passed
---
---
 .../@stdlib/nlp/sentencize/lib/main.js        | 37 +++++++++++++++---
 .../@stdlib/nlp/sentencize/test/test.js       | 38 +++++++++++++++++++
 2 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/lib/node_modules/@stdlib/nlp/sentencize/lib/main.js b/lib/node_modules/@stdlib/nlp/sentencize/lib/main.js
index 8cf46b32640f..e03dba300ab7 100644
--- a/lib/node_modules/@stdlib/nlp/sentencize/lib/main.js
+++ b/lib/node_modules/@stdlib/nlp/sentencize/lib/main.js
@@ -27,11 +27,13 @@ var trim = require( '@stdlib/string/base/trim' );
 
 // VARIABLES //
 
+var RE_LOWERCASE = /^[a-z]+$/;
 var RE_CAPITALIZED = /^[A-Z][a-z]{0,4}$/;
 var RE_CAPITALIZED_PERIOD = /^([A-Z]\.)*[A-Z]$/;
 var RE_NUMBER = /^[0-9]$/;
 var RE_PREFIXES = /^[{[(<:;"'”`]/;
 var RE_SUFFIXES = /[})\]>:;"'”`]$/;
+var RE_QUOTES = /^["'`]$/;
 
 
 // FUNCTIONS //
@@ -51,10 +53,38 @@ var RE_SUFFIXES = /[})\]>:;"'”`]$/;
 * @returns {boolean} boolean indicating whether the token at a specified index is an end-of-sentence token
 */
 function isEndOfSentence( tokens, i ) {
+	var nextToken;
 	var token;
 	var im1 = i - 1;
 	var ip1 = i + 1;
+
 	token = tokens[ i ];
+
+	// Handle quoted text with punctuation...
+	if (
+		RE_QUOTES.test( token ) &&
+		i > 0 &&
+		( tokens[ im1 ] === '.' || tokens[ im1 ] === '!' || tokens[ im1 ] === '?' )
+	) {
+		// Look ahead to see if sentence continues:
+		ip1 = i + 1;
+		if ( ip1 < tokens.length ) {
+			// Skip spaces...
+			while ( ip1 < tokens.length && tokens[ ip1 ] === ' ' ) {
+				ip1 += 1;
+			}
+			// If next non-space token is lowercase or certain punctuation, sentence continues:
+			if ( ip1 < tokens.length ) {
+				nextToken = tokens[ ip1 ];
+				if ( RE_LOWERCASE.test( nextToken ) ) {
+					return false;
+				}
+			}
+		}
+		return true;
+	}
+
+	// Regular sentence ending punctuation...
 	if (
 		token === '.' &&
 		!RE_CAPITALIZED.test( tokens[ im1 ] ) && // for other short abbreviations and bullet points
@@ -73,12 +103,6 @@ function isEndOfSentence( tokens, i ) {
 	) {
 		return true;
 	}
-	if (
-		RE_SUFFIXES.test( token ) &&
-		( tokens[ im1 ] === '.' || tokens[ im1 ] === '!' || tokens[ im1 ] === '?' )
-	) {
-		return true;
-	}
 	return false;
 }
 
@@ -112,6 +136,7 @@ function sentencize( str ) {
 	var tokens;
 	var out;
 	var i;
+
 	if ( !isString( str ) ) {
 		throw new TypeError( 'invalid argument. Must provide a string. Value: `' + str + '`.' );
 	}
diff --git a/lib/node_modules/@stdlib/nlp/sentencize/test/test.js b/lib/node_modules/@stdlib/nlp/sentencize/test/test.js
index 304534773b85..95e641e770c0 100644
--- a/lib/node_modules/@stdlib/nlp/sentencize/test/test.js
+++ b/lib/node_modules/@stdlib/nlp/sentencize/test/test.js
@@ -317,3 +317,41 @@ tape( 'the function returns an empty array if provided an empty string', functio
 	t.equal( out.length, 0, 'array length is zero' );
 	t.end();
 });
+
+tape( 'the function correctly handles punctuation within quotation marks', function test( t ) {
+	var expected;
+	var actual;
+	var str;
+
+	str = 'I said "Look out" right before he banged his head.';
+	expected = [ 'I said "Look out" right before he banged his head.' ];
+	actual = sentencize( str );
+	t.deepEqual( actual, expected, 'keeps sentence with simple quotes together' );
+
+	str = 'I said "Look out!" right before he banged his head.';
+	expected = [ 'I said "Look out!" right before he banged his head.' ];
+	actual = sentencize( str );
+	t.deepEqual( actual, expected, 'keeps sentence with exclamation in quotes together' );
+
+	str = 'He asked "What time is it?" before leaving.';
+	expected = [ 'He asked "What time is it?" before leaving.' ];
+	actual = sentencize( str );
+	t.deepEqual( actual, expected, 'keeps sentence with question mark in quotes together' );
+
+	str = '"Stop!" he yelled. "We need to think about this."';
+	expected = [ '"Stop!" he yelled.', '"We need to think about this."' ];
+	actual = sentencize( str );
+	t.deepEqual( actual, expected, 'correctly splits multiple quoted sentences' );
+
+	str = 'She said "This is great!" and smiled.';
+	expected = [ 'She said "This is great!" and smiled.' ];
+	actual = sentencize( str );
+	t.deepEqual( actual, expected, 'keeps sentence with exclamation in middle quotes together' );
+
+	str = '"Is this correct?" he wondered. "I think so!" she replied.';
+	expected = [ '"Is this correct?" he wondered.', '"I think so!" she replied.' ];
+	actual = sentencize( str );
+	t.deepEqual( actual, expected, 'correctly handles multiple quoted sentences with different punctuation' );
+
+	t.end();
+});

From 876302ffdff51339ad33aeb58926593d2f1a151c Mon Sep 17 00:00:00 2001
From: Philipp Burckhardt <pburckhardt@outlook.com>
Date: Sat, 22 Feb 2025 17:07:13 -0500
Subject: [PATCH 2/2] chore: update comment

---
type: pre_commit_static_analysis_report
description: Results of running static analysis checks when committing changes.
report:
  - task: lint_filenames
    status: passed
  - task: lint_editorconfig
    status: passed
  - task: lint_markdown
    status: na
  - task: lint_package_json
    status: na
  - task: lint_repl_help
    status: na
  - task: lint_javascript_src
    status: passed
  - task: lint_javascript_cli
    status: na
  - task: lint_javascript_examples
    status: na
  - task: lint_javascript_tests
    status: na
  - task: lint_javascript_benchmarks
    status: na
  - task: lint_python
    status: na
  - task: lint_r
    status: na
  - task: lint_c_src
    status: na
  - task: lint_c_examples
    status: na
  - task: lint_c_benchmarks
    status: na
  - task: lint_c_tests_fixtures
    status: na
  - task: lint_shell
    status: na
  - task: lint_typescript_declarations
    status: na
  - task: lint_typescript_tests
    status: na
  - task: lint_license_headers
    status: passed
---

---
type: pre_push_report
description: Results of running various checks prior to pushing changes.
report:
  - task: run_javascript_examples
    status: na
  - task: run_c_examples
    status: na
  - task: run_cpp_examples
    status: na
  - task: run_javascript_readme_examples
    status: na
  - task: run_c_benchmarks
    status: na
  - task: run_cpp_benchmarks
    status: na
  - task: run_fortran_benchmarks
    status: na
  - task: run_javascript_benchmarks
    status: na
  - task: run_julia_benchmarks
    status: na
  - task: run_python_benchmarks
    status: na
  - task: run_r_benchmarks
    status: na
  - task: run_javascript_tests
    status: na
---
---
 lib/node_modules/@stdlib/nlp/sentencize/lib/main.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/node_modules/@stdlib/nlp/sentencize/lib/main.js b/lib/node_modules/@stdlib/nlp/sentencize/lib/main.js
index e03dba300ab7..e41feaf0130a 100644
--- a/lib/node_modules/@stdlib/nlp/sentencize/lib/main.js
+++ b/lib/node_modules/@stdlib/nlp/sentencize/lib/main.js
@@ -73,7 +73,7 @@ function isEndOfSentence( tokens, i ) {
 			while ( ip1 < tokens.length && tokens[ ip1 ] === ' ' ) {
 				ip1 += 1;
 			}
-			// If next non-space token is lowercase or certain punctuation, sentence continues:
+			// If next non-space token is lowercase, we assume the sentence continues:
 			if ( ip1 < tokens.length ) {
 				nextToken = tokens[ ip1 ];
 				if ( RE_LOWERCASE.test( nextToken ) ) {