@@ -294,6 +294,11 @@ private function handle(): self
294294 $ workingCleanString = $ this ->cleanString ;
295295 $ normalizedString = $ this ->stringNormalizer ->normalize ($ workingCleanString );
296296
297+ // Preserve the original normalized string for full-word context lookups.
298+ // Masking replaces characters with *, which breaks word boundaries and can
299+ // cause the pure-alpha-substring check to miss compound profanity.
300+ $ originalNormalized = preg_replace ('/\s+/ ' , ' ' , $ normalizedString );
301+
297302 // Loop through until no more profanities are detected
298303 while ($ continue ) {
299304 $ continue = false ;
@@ -323,6 +328,16 @@ private function handle(): self
323328 // Use boundaries to extract the full word around the match
324329 $ fullWord = $ this ->getFullWordContext ($ normalizedString , $ start , $ length );
325330
331+ // If the match is purely alphabetic and is a substring of a larger
332+ // alphabetic word, it's a legitimate word — not obfuscated profanity
333+ // e.g. "spac" inside "space", "ass" inside "class"
334+ // Use the original unmasked string for context so that masking
335+ // doesn't break compound profanity detection.
336+ $ originalFullWord = $ this ->getFullWordContext ($ originalNormalized , $ start , $ length );
337+ if ($ this ->isPureAlphaSubstring ($ matchedText , $ originalFullWord , $ profanity )) {
338+ continue ;
339+ }
340+
326341 // Check if the full word (in lowercase) is in the false positives list
327342 if ($ this ->profanityDetector ->isFalsePositive ($ fullWord )) {
328343 continue ; // Skip checking this word if it's a false positive
@@ -501,6 +516,76 @@ private function isSpanningWordBoundary(string $matchedText, string $fullString,
501516 return false ;
502517 }
503518
519+ /**
520+ * Check if the matched text is a purely alphabetic substring of a larger
521+ * purely alphabetic word, indicating a likely false positive.
522+ *
523+ * This catches cases like "spac" inside "space" or "ass" inside "class"
524+ * without needing to enumerate every false positive word.
525+ *
526+ * Obfuscated profanity (e.g. "sp@c", "s-p-a-c") contains non-alpha
527+ * characters and will NOT be skipped by this check.
528+ *
529+ * Conjugated profanity (e.g. "fuckings" = "fucking" + "s") and compound
530+ * profanity (e.g. "cuntfuck") are also NOT skipped.
531+ *
532+ * @param string $matchedText The text that matched the profanity pattern
533+ * @param string $fullWord The full word context surrounding the match
534+ * @param string $profanityKey The base profanity word from the list
535+ * @return bool
536+ */
537+ private function isPureAlphaSubstring (string $ matchedText , string $ fullWord , string $ profanityKey ): bool
538+ {
539+ // Only applies if the matched text is entirely alphabetic (no obfuscation)
540+ if (!preg_match ('/^[a-zA-Z]+$/ ' , $ matchedText )) {
541+ return false ;
542+ }
543+
544+ // Only applies if the surrounding word is also entirely alphabetic
545+ if (!preg_match ('/^[a-zA-Z]+$/ ' , $ fullWord )) {
546+ return false ;
547+ }
548+
549+ // Not embedded if same length (standalone word)
550+ if (strlen ($ fullWord ) <= strlen ($ matchedText )) {
551+ return false ;
552+ }
553+
554+ // If the match is longer than the profanity key, it contains repeated
555+ // characters — this is obfuscation, not a regular word (e.g. "ccuunntt" for "cunt")
556+ if (strlen ($ matchedText ) > strlen ($ profanityKey )) {
557+ return false ;
558+ }
559+
560+ $ matchLower = strtolower ($ matchedText );
561+ $ wordLower = strtolower ($ fullWord );
562+
563+ // Check if the full word is the profanity with a common suffix
564+ // e.g. "fuckings" = "fucking" + "s" — this is conjugated profanity, not a false positive
565+ $ suffixes = ['s ' , 'es ' , 'ed ' , 'er ' , 'ers ' , 'est ' , 'ing ' , 'ings ' , 'ly ' , 'y ' ];
566+
567+ foreach ($ suffixes as $ suffix ) {
568+ if ($ wordLower === $ matchLower . $ suffix ) {
569+ return false ;
570+ }
571+ }
572+
573+ // Check if the remainder (full word minus the match) contains another
574+ // known profanity — this indicates compound profanity like "cuntfuck"
575+ $ pos = strpos ($ wordLower , $ matchLower );
576+ if ($ pos !== false ) {
577+ $ remainder = substr ($ wordLower , 0 , $ pos ) . substr ($ wordLower , $ pos + strlen ($ matchLower ));
578+ foreach ($ this ->profanityDetector ->getProfanityExpressions () as $ profanity => $ _ ) {
579+ if (strlen ($ profanity ) >= 3 && stripos ($ remainder , $ profanity ) !== false ) {
580+ return false ;
581+ }
582+ }
583+ }
584+
585+ // The match is embedded in a larger regular word (e.g., "spac" in "space")
586+ return true ;
587+ }
588+
504589 /**
505590 * Get the full word context surrounding the matched profanity.
506591 *
0 commit comments