Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions packages/content-common/src/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,90 @@ describe('content-common', () => {
expect(applyApTitleCase(result)).toEqual(expected);
});
});

it('should capitalize "a" and "the" after sentence-ending punctuation', () => {
const testCases = [
{
result: 'Nazi Persecution Scattered My Family. a Lost Archive Brought Us Together',
expected: 'Nazi Persecution Scattered My Family. A Lost Archive Brought Us Together',
},
{
result: 'This is the end! the beginning starts now',
expected: 'This Is the End! The Beginning Starts Now',
},
{
result: 'What happened? a miracle occurred',
expected: 'What Happened? A Miracle Occurred',
},
{
result: 'She said "Hello." the crowd cheered',
expected: 'She Said "Hello." The Crowd Cheered',
},
];
testCases.forEach(({ result, expected }) => {
expect(applyApTitleCase(result)).toEqual(expected);
});
});

it('should always format iPhone correctly', () => {
const testCases = [
{
result: 'the new Iphone is amazing',
expected: 'The New iPhone Is Amazing',
},
{
result: 'IPHONE users love their devices',
expected: 'iPhone Users Love Their Devices',
},
{
result: 'my iphone broke yesterday',
expected: 'My iPhone Broke Yesterday',
},
];
testCases.forEach(({ result, expected }) => {
expect(applyApTitleCase(result)).toEqual(expected);
});
});

it('should always lowercase "vs."', () => {
const testCases = [
{
result: 'Apple Vs. Samsung: the battle continues',
expected: 'Apple vs. Samsung: The Battle Continues',
},
{
result: 'Batman VS. Superman was a movie',
expected: 'Batman vs. Superman Was a Movie',
},
{
result: 'Good vs Evil: a timeless struggle',
expected: 'Good vs. Evil: A Timeless Struggle',
},
];
testCases.forEach(({ result, expected }) => {
expect(applyApTitleCase(result)).toEqual(expected);
});
});

it('should not capitalize "as" in title case', () => {
const testCases = [
{
result: 'Working As a Team Is Important',
expected: 'Working as a Team Is Important',
},
{
result: 'As The Sun Sets',
expected: 'As the Sun Sets',
},
{
result: 'She Sees It As An Opportunity',
expected: 'She Sees It as an Opportunity',
},
];
testCases.forEach(({ result, expected }) => {
expect(applyApTitleCase(result)).toEqual(expected);
});
});
});
describe('lowercaseAfterApostrophe', () => {
it('lowercase letter after apostrophe & return new string', () => {
Expand Down
54 changes: 50 additions & 4 deletions packages/content-common/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@ export const sanitizeText = (input: string, maxLength: number): string => {
export const lowercaseAfterApostrophe = (input: string): string => {
// Match either an ASCII or curly apostrophe followed by a letter, after a word character.
const regex = /(?<=\w)(['\u2018\u2019])(\w)/g;
return input.replace(regex, (_, apostrophe, letter) => `${apostrophe}${letter.toLowerCase()}`);
return input.replace(
regex,
(_, apostrophe, letter) => `${apostrophe}${letter.toLowerCase()}`,
);
};

/**
Expand Down Expand Up @@ -89,22 +92,65 @@ export const applyApTitleCase = (value: string): string => {

const result = allWords
.map((word, index, all) => {
// Check if the previous non-empty element is a sentence-ending punctuation
const isAfterSentenceEnd =
index > 0 &&
(() => {
// Look for the previous non-empty element
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Look for the previous non-empty element
// Look for the previous non-whitespace characater

for (let i = index - 1; i >= 0; i--) {
const prev = all[i].trim();
if (prev) {
// Check if it ends with sentence-ending punctuation
return (
/[.!?]$/.test(prev) ||
// Or if it's a closing quote after sentence-ending punctuation
(i > 0 &&
(prev === '"' ||
prev === '\u201D' ||
prev === "'" ||
prev === '\u2019') &&
/[.!?]$/.test(all[i - 1].trim()))
);
}
}
return false;
})();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not totally sure, but I think this for loop might be replaced with a regex. Here's what ChatGPT suggests:

// look-behind version (Node 20+)
const SENT_START = /(?<=[.!?]\s*["""'']*\s*)(?:a|the)\b/gi;

text = text.replace(
  SENT_START,
  w => w[0].toUpperCase() + w.slice(1)
);

We may also want to cover cases where the quote is after the punctuation:

The crowd cheered. "Hello," she said.


const isAfterColon = index > 0 && all[index - 1].trim() === ':';

const isAfterQuote =
index > 0 &&
(allWords[index - 1] === "'" ||
allWords[index - 1] === '"' ||
allWords[index - 1] === '\u2018' || // Opening single quote ’
allWords[index - 1] === '\u201C'); // Opening double quote “
allWords[index - 1] === '\u2018' || // Opening single quote '
allWords[index - 1] === '\u201C'); // Opening double quote "

// Special case handling for specific words
const lowerWord = word.toLowerCase();

// Handle iPhone
if (lowerWord === 'iphone') {
return 'iPhone';
}

// Handle vs.
if (lowerWord === 'vs.' || lowerWord === 'vs') {
return 'vs.';
}

// Check if we should capitalize this word
if (
index === 0 || // first word
index === all.length - 1 || // last word
isAfterColon || // capitalize the first word after a colon
isAfterQuote || // capitalize the first word after a quote
!stop.includes(word.toLowerCase()) // not a stop word
isAfterSentenceEnd || // capitalize after sentence-ending punctuation
(!stop.includes(lowerWord) && lowerWord !== 'as') // not a stop word and not 'as'
) {
// Special handling for 'a' and 'the' after sentence-ending punctuation
if (isAfterSentenceEnd && (lowerWord === 'a' || lowerWord === 'the')) {
return capitalize(word);
}
return capitalize(word);
}

Expand Down