diff --git a/README.md b/README.md index 11def20..1920589 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,24 @@ var result = helper.GenerateSlug("Simple,short&quick Example"); Console.WriteLine(result); // Simple-short-quick-Example ``` +To enable hash-based shortening for unique truncated slugs: + +```csharp +var config = new SlugHelperConfiguration +{ + MaximumLength = 12, + EnableHashedShortening = true, + HashLength = 4 // Use 4-character hash for better collision resistance +}; + +var helper = new SlugHelper(config); + +// These will produce different results despite similar input +Console.WriteLine(helper.GenerateSlug("The very long name liga")); // "the-v-2a4b" +Console.WriteLine(helper.GenerateSlug("The very long name liga (W)")); // "the-v-8f3c" +Console.WriteLine(helper.GenerateSlug("The very long name liga (M)")); // "the-v-d1e7" +``` + The following options can be configured with the `SlugHelperConfiguration`: ### `ForceLowerCase` @@ -214,4 +232,39 @@ Specifying the `DeniedCharactersRegex` option will disable the character removal This will limit the length of the generated slug to be a maximum of the number of chars given by the parameter. If the truncation happens in a way that a trailing `-` is left, it will be removed. -- Default value: `null` \ No newline at end of file +- Default value: `null` + +### `EnableHashedShortening` + +When enabled, slugs that exceed `MaximumLength` will be shortened with a hash postfix to ensure uniqueness. The hash postfix is derived from the full slug before truncation using a deterministic FNV-1a hash algorithm. This prevents different inputs from producing identical shortened slugs. + +For example, when `MaximumLength` is 12: +- `"The very long name liga"` becomes `"the-very-54"` (instead of `"the-very-lon"`) +- `"The very long name liga (W)"` becomes `"the-very-a2"` (instead of `"the-very-lon"`) + +The hash postfix format is `"-XX"` where `XX` is a lowercase hexadecimal hash. The hash length can be configured using `HashLength`. If `MaximumLength` is too small to accommodate the hash postfix, it will fall back to simple truncation. + +- Default value: `false` + +### `HashLength` + +Controls the length of the hash postfix when `EnableHashedShortening` is enabled. Valid values are 2-6 characters. Higher values provide better collision resistance but use more characters from the maximum length. + +- 2 characters: 256 possible values (good for small sets) +- 4 characters: 65,536 possible values (recommended for most use cases) +- 6 characters: 16,777,216 possible values (best collision resistance) + +For example: +```csharp +var config = new SlugHelperConfiguration +{ + MaximumLength = 15, + EnableHashedShortening = true, + HashLength = 4 // Use 4-character hash for better collision resistance +}; + +var helper = new SlugHelper(config); +Console.WriteLine(helper.GenerateSlug("The very long name liga")); // "the-very-2a4b" +``` + +- Default value: `2` \ No newline at end of file diff --git a/src/Slugify.Core/SlugHelper.cs b/src/Slugify.Core/SlugHelper.cs index 136090a..56fbada 100644 --- a/src/Slugify.Core/SlugHelper.cs +++ b/src/Slugify.Core/SlugHelper.cs @@ -88,15 +88,83 @@ public virtual string GenerateSlug(string inputString) if (Config.MaximumLength.HasValue && sb.Length > Config.MaximumLength.Value) { - sb.Remove(Config.MaximumLength.Value, sb.Length - Config.MaximumLength.Value); - // Remove trailing dash if it exists - if (sb.Length > 0 && sb[sb.Length - 1] == '-') + if (Config.EnableHashedShortening) { - sb.Remove(sb.Length - 1, 1); + // Generate hash from the full slug before truncation + var fullSlug = sb.ToString(); + var hash = GenerateSlugHash(fullSlug, Config.HashLength); + + // Calculate target length leaving room for hash and separator + var hashWithSeparator = Config.HashLength + 1; // +1 for the dash + var targetLength = Config.MaximumLength.Value - hashWithSeparator; + if (targetLength < 1) + { + // If maximum length is too small for hash postfix, just truncate normally + sb.Remove(Config.MaximumLength.Value, sb.Length - Config.MaximumLength.Value); + } + else + { + // Truncate to make room for hash + sb.Remove(targetLength, sb.Length - targetLength); + + // Remove trailing dash if it exists + while (sb.Length > 0 && sb[sb.Length - 1] == '-') + { + sb.Remove(sb.Length - 1, 1); + } + + // Append hash postfix + sb.Append('-'); + sb.Append(hash); + } + } + else + { + // Original behavior: simple truncation + sb.Remove(Config.MaximumLength.Value, sb.Length - Config.MaximumLength.Value); + // Remove trailing dash if it exists + if (sb.Length > 0 && sb[sb.Length - 1] == '-') + { + sb.Remove(sb.Length - 1, 1); + } } } return sb.ToString(); } + /// + /// Generates a deterministic hash from the input string for use as a postfix. + /// Uses FNV-1a algorithm for consistent cross-platform results and better collision resistance. + /// + /// The input string to hash + /// The desired length of the hash (2-6 characters) + /// A lowercase hexadecimal hash of the specified length + private static string GenerateSlugHash(string input, int length) + { + // Clamp length to valid range + length = Math.Max(2, Math.Min(6, length)); + + // FNV-1a hash algorithm constants + const uint FNV_OFFSET_BASIS = 2166136261; + const uint FNV_PRIME = 16777619; + + // Calculate FNV-1a hash + uint hash = FNV_OFFSET_BASIS; + var bytes = Encoding.UTF8.GetBytes(input); + + foreach (byte b in bytes) + { + hash ^= b; + hash *= FNV_PRIME; + } + + // Convert to hex string of desired length + var hashBytes = BitConverter.GetBytes(hash); + var hexString = BitConverter.ToString(hashBytes).Replace("-", "").ToLowerInvariant(); + + // Take the first 'length' characters for the hash + return hexString.Substring(0, length); + } + } \ No newline at end of file diff --git a/src/Slugify.Core/SlugHelperConfiguration.cs b/src/Slugify.Core/SlugHelperConfiguration.cs index 3874aef..5707087 100644 --- a/src/Slugify.Core/SlugHelperConfiguration.cs +++ b/src/Slugify.Core/SlugHelperConfiguration.cs @@ -66,6 +66,20 @@ public class SlugHelperConfiguration /// public int? MaximumLength { get; set; } + /// + /// When enabled, slugs that exceed MaximumLength will be shortened with a hash postfix to ensure uniqueness. + /// The hash postfix is a suffix derived from the full slug before truncation. + /// Defaults to false for backward compatibility. + /// + public bool EnableHashedShortening { get; set; } + + /// + /// Length of the hash postfix when EnableHashedShortening is true. + /// Valid values are 2-6 characters. Defaults to 2 for backward compatibility. + /// Higher values provide better collision resistance but use more characters. + /// + public int HashLength { get; set; } = 2; + /// /// Enable non-ASCII languages support. Defaults to false /// diff --git a/tests/Slugify.Core.Tests/SlugHelperTests.cs b/tests/Slugify.Core.Tests/SlugHelperTests.cs index e711ca9..710afe6 100644 --- a/tests/Slugify.Core.Tests/SlugHelperTests.cs +++ b/tests/Slugify.Core.Tests/SlugHelperTests.cs @@ -1,5 +1,7 @@ using System; +using System.Collections.Generic; using System.Globalization; +using System.Linq; using Xunit; @@ -797,6 +799,562 @@ public void MaximumLengthGivenTrimsUnnecessaryChars(int? length, string input, s Assert.Equal(expected, helper.GenerateSlug(input)); } + [Theory] + [InlineData("The very long name liga", 12)] + [InlineData("The very long name liga (W)", 12)] + [InlineData("The very long name liga (M)", 12)] + [InlineData("abcdefghijklmnopqrstuvwxy", 15)] + public void EnableHashedShorteningCreatesUniqueResults(string input, int maxLength) + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = maxLength, + EnableHashedShortening = true + }); + + var result = helper.GenerateSlug(input); + + // Should be within the maximum length + Assert.True(result.Length <= maxLength); + + // Should end with a dash followed by 2 hex characters (hash) + Assert.True(result.Length >= 3); // At least "X-YZ" + Assert.Equal('-', result[result.Length - 3]); + + // Last 2 characters should be valid hex + var hash = result.Substring(result.Length - 2); + Assert.True(hash.All(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))); + } + + [Fact] + public void EnableHashedShorteningCreatesDifferentHashesForDifferentInputs() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 12, + EnableHashedShortening = true + }); + + var result1 = helper.GenerateSlug("The very long name liga"); + var result2 = helper.GenerateSlug("The very long name liga (W)"); + var result3 = helper.GenerateSlug("The very long name liga (M)"); + + // All should be different + Assert.NotEqual(result1, result2); + Assert.NotEqual(result1, result3); + Assert.NotEqual(result2, result3); + + // All should have different hash suffixes + var hash1 = result1.Substring(result1.Length - 2); + var hash2 = result2.Substring(result2.Length - 2); + var hash3 = result3.Substring(result3.Length - 2); + + Assert.NotEqual(hash1, hash2); + Assert.NotEqual(hash1, hash3); + Assert.NotEqual(hash2, hash3); + } + + [Fact] + public void EnableHashedShorteningWithTooShortMaxLengthFallsBackToTruncation() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 3, + EnableHashedShortening = true + }); + + var result = helper.GenerateSlug("test input"); + + // Should fallback to simple truncation + Assert.Equal(3, result.Length); + Assert.Equal("tes", result); + } + + [Fact] + public void EnableHashedShorteningWithNoTruncationNeededBehavesNormally() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 50, + EnableHashedShortening = true + }); + + var result = helper.GenerateSlug("short"); + + // Should not add hash if no truncation is needed + Assert.Equal("short", result); + } + + [Fact] + public void HashedShorteningIsDisabledByDefault() + { + var config = new SlugHelperConfiguration(); + Assert.False(config.EnableHashedShortening); + } + + [Fact] + public void HashLengthDefaultsToTwo() + { + var config = new SlugHelperConfiguration(); + Assert.Equal(2, config.HashLength); + } + + [Fact] + public void EnableHashedShorteningWithCustomHashLength() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 15, + EnableHashedShortening = true, + HashLength = 4 + }); + + var result = helper.GenerateSlug("The very long name that needs truncation"); + + // Should be within the maximum length + Assert.True(result.Length <= 15); + + // Should end with a dash followed by 4 hex characters + Assert.Equal('-', result[result.Length - 5]); + var hash = result.Substring(result.Length - 4); + Assert.Equal(4, hash.Length); + Assert.True(hash.All(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))); + } + + [Fact] + public void EnableHashedShorteningWithHashLengthSix() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 20, + EnableHashedShortening = true, + HashLength = 6 + }); + + var result = helper.GenerateSlug("The very long name that needs truncation"); + + // Should be within the maximum length + Assert.True(result.Length <= 20); + + // Should end with a dash followed by 6 hex characters + Assert.Equal('-', result[result.Length - 7]); + var hash = result.Substring(result.Length - 6); + Assert.Equal(6, hash.Length); + Assert.True(hash.All(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))); + } + + [Fact] + public void EnableHashedShorteningHashLengthIsClamped() + { + // Test that hash length is clamped to valid range (2-6) + var helperTooSmall = Create(new SlugHelperConfiguration() + { + MaximumLength = 10, + EnableHashedShortening = true, + HashLength = 1 // Should be clamped to 2 + }); + + var resultTooSmall = helperTooSmall.GenerateSlug("test input for hashing"); + var hashTooSmall = resultTooSmall.Substring(resultTooSmall.Length - 2); + Assert.Equal(2, hashTooSmall.Length); + + var helperTooBig = Create(new SlugHelperConfiguration() + { + MaximumLength = 15, + EnableHashedShortening = true, + HashLength = 10 // Should be clamped to 6 + }); + + var resultTooBig = helperTooBig.GenerateSlug("test input for hashing"); + var hashTooBig = resultTooBig.Substring(resultTooBig.Length - 6); + Assert.Equal(6, hashTooBig.Length); + } + + [Fact] + public void EnableHashedShorteningImprovedCollisionResistance() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 15, + EnableHashedShortening = true, + HashLength = 4 // Using 4 chars for better collision resistance + }); + + // Generate many different inputs with similar prefixes + var inputs = new[] + { + "The very long name liga", + "The very long name liga (W)", + "The very long name liga (M)", + "The very long name liga (L)", + "The very long name liga (XL)", + "The very long name liga (XXL)", + "The very long name liga Black", + "The very long name liga White", + "The very long name liga Red", + "The very long name liga Blue", + "The very long name liga Green", + "The very long name liga Yellow", + "The very long name liga Orange", + "The very long name liga Purple", + "The very long name liga Pink", + "The very long name liga Brown", + "The very long name liga Gray", + "The very long name liga Silver", + "The very long name liga Gold", + "The very long name liga Platinum" + }; + + var results = new HashSet(); + var hashes = new HashSet(); + + foreach (var input in inputs) + { + var result = helper.GenerateSlug(input); + var hash = result.Substring(result.Length - 4); + + // Each result should be unique + Assert.True(results.Add(result), $"Duplicate result found: {result}"); + + // Each hash should be unique (with 4 chars we have 65536 possibilities) + Assert.True(hashes.Add(hash), $"Hash collision found: {hash} for input: {input}"); + + // Verify result is within length limit + Assert.True(result.Length <= 15); + + // Verify hash format + Assert.True(hash.All(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))); + } + + // With 20 inputs and 4-char hash (65536 possibilities), we should have no collisions + Assert.Equal(inputs.Length, results.Count); + Assert.Equal(inputs.Length, hashes.Count); + } + + [Fact] + public void EnableHashedShorteningDeterministicAcrossPlatforms() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 12, + EnableHashedShortening = true, + HashLength = 4 + }); + + // Test that the same input produces the same hash consistently + var input = "The very long name that needs truncation"; + var result1 = helper.GenerateSlug(input); + var result2 = helper.GenerateSlug(input); + + Assert.Equal(result1, result2); + + // Extract hash parts + var hash1 = result1.Substring(result1.Length - 4); + var hash2 = result2.Substring(result2.Length - 4); + + Assert.Equal(hash1, hash2); + } + + [Fact] + public void EnableHashedShorteningWithForceLowerCaseFalse() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 12, + EnableHashedShortening = true, + ForceLowerCase = false + }); + + var result1 = helper.GenerateSlug("The Very Long Name Liga"); + var result2 = helper.GenerateSlug("The Very Long Name Liga (W)"); + + // Should preserve case in the truncated part but still add hash + Assert.True(result1.Length <= 12); + Assert.True(result2.Length <= 12); + Assert.NotEqual(result1, result2); + + // Both should end with hash pattern + Assert.Equal('-', result1[result1.Length - 3]); + Assert.Equal('-', result2[result2.Length - 3]); + + // Hash should be different + var hash1 = result1.Substring(result1.Length - 2); + var hash2 = result2.Substring(result2.Length - 2); + Assert.NotEqual(hash1, hash2); + } + + [Fact] + public void EnableHashedShorteningWithCustomStringReplacements() + { + var config = new SlugHelperConfiguration() + { + MaximumLength = 15, + EnableHashedShortening = true + }; + config.StringReplacements.Add("&", "and"); + config.StringReplacements.Add("@", "at"); + + var helper = Create(config); + + var result1 = helper.GenerateSlug("Company & Partners @ Location"); + var result2 = helper.GenerateSlug("Company & Partners @ Different Location"); + + // Should apply replacements before hashing and truncation + Assert.True(result1.Length <= 15); + Assert.True(result2.Length <= 15); + Assert.NotEqual(result1, result2); + + // Should contain "and" and "at" in the processed slug + var fullSlug1 = helper.GenerateSlug("Company & Partners @ Location"); + var fullSlug2 = helper.GenerateSlug("Company & Partners @ Different Location"); + + // Hash should be calculated after replacements + Assert.Equal('-', result1[result1.Length - 3]); + Assert.Equal('-', result2[result2.Length - 3]); + } + + [Fact] + public void EnableHashedShorteningWithCollapseDashesFalse() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 12, + EnableHashedShortening = true, + CollapseDashes = false + }); + + var result = helper.GenerateSlug("word & another word"); + + // Should preserve multiple dashes but still truncate with hash + Assert.True(result.Length <= 12); + Assert.Equal('-', result[result.Length - 3]); + + // Last 2 characters should be valid hex + var hash = result.Substring(result.Length - 2); + Assert.True(hash.All(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))); + } + + [Fact] + public void EnableHashedShorteningWithTrimWhitespaceFalse() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 15, + EnableHashedShortening = true, + TrimWhitespace = false + }); + + var result1 = helper.GenerateSlug(" long text with spaces "); + var result2 = helper.GenerateSlug("long text with spaces"); + + // Should treat differently due to leading/trailing spaces + Assert.True(result1.Length <= 15); + Assert.True(result2.Length <= 15); + Assert.NotEqual(result1, result2); + + // Both should have hash postfix + Assert.Equal('-', result1[result1.Length - 3]); + Assert.Equal('-', result2[result2.Length - 3]); + } + + [Fact] + public void EnableHashedShorteningWithDeniedCharactersRegex() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 12, + EnableHashedShortening = true, + DeniedCharactersRegex = new System.Text.RegularExpressions.Regex(@"[^a-zA-Z0-9\-]") + }); + + var result1 = helper.GenerateSlug("test.with_special@chars"); + var result2 = helper.GenerateSlug("test with special chars"); + + // Should apply regex filtering before hashing + Assert.True(result1.Length <= 12); + Assert.True(result2.Length <= 12); + Assert.NotEqual(result1, result2); + + // Should end with hash + Assert.Equal('-', result1[result1.Length - 3]); + Assert.Equal('-', result2[result2.Length - 3]); + } + + [Fact] + public void EnableHashedShorteningWithNonAsciiLanguages() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 15, + EnableHashedShortening = true, + SupportNonAsciiLanguages = true + }); + + var result1 = helper.GenerateSlug("很长的中文文本需要被截断"); + var result2 = helper.GenerateSlug("很长的中文文本需要被截断和处理"); + + // Should handle non-ASCII characters and create unique hashes + Assert.True(result1.Length <= 15); + Assert.True(result2.Length <= 15); + Assert.NotEqual(result1, result2); + + // Should end with hash + Assert.Equal('-', result1[result1.Length - 3]); + Assert.Equal('-', result2[result2.Length - 3]); + + // Hash should be different + var hash1 = result1.Substring(result1.Length - 2); + var hash2 = result2.Substring(result2.Length - 2); + Assert.NotEqual(hash1, hash2); + } + + [Fact] + public void EnableHashedShorteningWithModifiedAllowedCharacters() + { + var config = new SlugHelperConfiguration() + { + MaximumLength = 12, + EnableHashedShortening = true + }; + + // Add some custom allowed characters + config.AllowedCharacters.Add('!'); + config.AllowedCharacters.Add('*'); + // Remove some default ones + config.AllowedCharacters.Remove('.'); + config.AllowedCharacters.Remove('_'); + + var helper = Create(config); + + var result1 = helper.GenerateSlug("test.with_special!chars*"); + var result2 = helper.GenerateSlug("test with special!chars*"); + + // Should respect modified allowed characters + Assert.True(result1.Length <= 12); + Assert.True(result2.Length <= 12); + Assert.NotEqual(result1, result2); + + // Should end with hash + Assert.Equal('-', result1[result1.Length - 3]); + Assert.Equal('-', result2[result2.Length - 3]); + + // Should contain allowed special chars but not denied ones + Assert.DoesNotContain(".", result1); + Assert.DoesNotContain("_", result1); + } + + [Fact] + public void EnableHashedShorteningComplexCombination() + { + var config = new SlugHelperConfiguration() + { + MaximumLength = 20, + EnableHashedShortening = true, + ForceLowerCase = false, + CollapseDashes = false, + TrimWhitespace = true, + SupportNonAsciiLanguages = true + }; + + config.StringReplacements.Add("&", " AND "); + config.StringReplacements.Add("@", " AT "); + + var helper = Create(config); + + var result1 = helper.GenerateSlug(" Company & Associates @ München "); + var result2 = helper.GenerateSlug(" Company & Associates @ Berlin "); + + // Should apply all transformations and create unique hashes + Assert.True(result1.Length <= 20); + Assert.True(result2.Length <= 20); + Assert.NotEqual(result1, result2); + + // Should end with hash + Assert.Equal('-', result1[result1.Length - 3]); + Assert.Equal('-', result2[result2.Length - 3]); + + // Hash should be different + var hash1 = result1.Substring(result1.Length - 2); + var hash2 = result2.Substring(result2.Length - 2); + Assert.NotEqual(hash1, hash2); + } + + [Fact] + public void EnableHashedShorteningWithEdgeCaseInputs() + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 10, + EnableHashedShortening = true + }); + + // Test with input that results in mostly dashes + var result1 = helper.GenerateSlug("!@#$%^&*() test !@#$%^&*()"); + var result2 = helper.GenerateSlug("!@#$%^&*() different !@#$%^&*()"); + + // Should handle edge cases gracefully + Assert.True(result1.Length <= 10); + Assert.True(result2.Length <= 10); + Assert.NotEqual(result1, result2); + + // Only check hash pattern if result is at max length (meaning it was truncated) + if (result1.Length == 10) + { + Assert.Equal('-', result1[result1.Length - 3]); + } + if (result2.Length == 10) + { + Assert.Equal('-', result2[result2.Length - 3]); + } + } + + [Theory] + [InlineData("Test & Co. @ Location #1", "Test & Co. @ Location #2")] + [InlineData("München Straße 123", "München Straße 456")] + [InlineData("Company &&& Name", "Company &&& Different")] + public void EnableHashedShorteningConsistentWithVariousInputs(string input1, string input2) + { + var helper = Create(new SlugHelperConfiguration() + { + MaximumLength = 15, + EnableHashedShortening = true, + SupportNonAsciiLanguages = true + }); + + var result1 = helper.GenerateSlug(input1); + var result2 = helper.GenerateSlug(input2); + + // Should create different results for different inputs + Assert.NotEqual(result1, result2); + + // Both should be within length limit + Assert.True(result1.Length <= 15); + Assert.True(result2.Length <= 15); + + // Both should have hash postfix if truncated + if (result1.Length == 15) + { + Assert.Equal('-', result1[result1.Length - 3]); + var hash1 = result1.Substring(result1.Length - 2); + Assert.True(hash1.All(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))); + } + + if (result2.Length == 15) + { + Assert.Equal('-', result2[result2.Length - 3]); + var hash2 = result2.Substring(result2.Length - 2); + Assert.True(hash2.All(c => (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))); + } + + // If both are at max length, hashes should be different + if (result1.Length == 15 && result2.Length == 15) + { + var hash1 = result1.Substring(result1.Length - 2); + var hash2 = result2.Substring(result2.Length - 2); + Assert.NotEqual(hash1, hash2); + } + } + [Fact] public void TestsInTheReadme() {