From 9a0b5f5a35b7e1075fb31d6b14a84b5561529f10 Mon Sep 17 00:00:00 2001 From: Duc Thanh Nguyen Date: Tue, 22 Oct 2024 00:20:57 +0700 Subject: [PATCH 1/5] Recommender Systems by User-based Collaborative Filtering --- .../CollaborativeFilteringTests.cs | 76 ++++++++++++++++++ .../CollaborativeFiltering.cs | 80 +++++++++++++++++++ README.md | 2 + 3 files changed, 158 insertions(+) create mode 100644 Algorithms.Tests/RecommenderSystem/CollaborativeFilteringTests.cs create mode 100644 Algorithms/RecommenderSystem/CollaborativeFiltering.cs diff --git a/Algorithms.Tests/RecommenderSystem/CollaborativeFilteringTests.cs b/Algorithms.Tests/RecommenderSystem/CollaborativeFilteringTests.cs new file mode 100644 index 00000000..b46dae92 --- /dev/null +++ b/Algorithms.Tests/RecommenderSystem/CollaborativeFilteringTests.cs @@ -0,0 +1,76 @@ +using Algorithms.RecommenderSystem; +using NUnit.Framework; +using System.Collections.Generic; + +namespace Algorithms.Tests.RecommenderSystem +{ + [TestFixture] + public class CollaborativeFilteringTests + { + private CollaborativeFiltering recommender = new(); + private Dictionary> testRatings = null!; + + [SetUp] + public void Setup() + { + recommender = new CollaborativeFiltering(); + + testRatings = new Dictionary> + { + ["user1"] = new() + { + ["item1"] = 5.0, + ["item2"] = 3.0, + ["item3"] = 4.0 + }, + ["user2"] = new() + { + ["item1"] = 4.0, + ["item2"] = 2.0, + ["item3"] = 5.0 + }, + ["user3"] = new() + { + ["item1"] = 3.0, + ["item2"] = 4.0, + ["item4"] = 3.0 + } + }; + } + + [Test] + [TestCase("item1", 4.0, 5.0)] + [TestCase("item2", 2.0, 4.0)] + public void CalculateSimilarity_WithValidInputs_ReturnsExpectedResults( + string commonItem, + double rating1, + double rating2) + { + var user1Ratings = new Dictionary { [commonItem] = rating1 }; + var user2Ratings = new Dictionary { [commonItem] = rating2 }; + + var similarity = recommender.CalculateSimilarity(user1Ratings, user2Ratings); + + Assert.That(similarity, Is.InRange(-1.0, 1.0)); + } + + [Test] + public void CalculateSimilarity_WithNoCommonItems_ReturnsZero() + { + var user1Ratings = new Dictionary { ["item1"] = 5.0 }; + var user2Ratings = new Dictionary { ["item2"] = 4.0 }; + + var similarity = recommender.CalculateSimilarity(user1Ratings, user2Ratings); + + Assert.That(similarity, Is.EqualTo(0)); + } + + [Test] + public void PredictRating_WithNonexistentItem_ReturnsZero() + { + var predictedRating = recommender.PredictRating("nonexistentItem", "user1", testRatings); + + Assert.That(predictedRating, Is.EqualTo(0)); + } + } +} diff --git a/Algorithms/RecommenderSystem/CollaborativeFiltering.cs b/Algorithms/RecommenderSystem/CollaborativeFiltering.cs new file mode 100644 index 00000000..cb5b5bd1 --- /dev/null +++ b/Algorithms/RecommenderSystem/CollaborativeFiltering.cs @@ -0,0 +1,80 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Algorithms.RecommenderSystem +{ + public class CollaborativeFiltering + { + /// + /// Method to calculate similarity between two users using Pearson correlation. + /// + /// Rating of User 1. + /// Rating of User 2. + /// double value to reflect the index of similarity between two users. + public double CalculateSimilarity(Dictionary user1Ratings, Dictionary user2Ratings) + { + var commonItems = user1Ratings.Keys.Intersect(user2Ratings.Keys).ToList(); + if (commonItems.Count == 0) + { + return 0; + } + + var user1Scores = commonItems.Select(item => user1Ratings[item]).ToArray(); + var user2Scores = commonItems.Select(item => user2Ratings[item]).ToArray(); + + var avgUser1 = user1Scores.Average(); + var avgUser2 = user2Scores.Average(); + + double numerator = 0; + double sumSquare1 = 0; + double sumSquare2 = 0; + + for (var i = 0; i < commonItems.Count; i++) + { + var diff1 = user1Scores[i] - avgUser1; + var diff2 = user2Scores[i] - avgUser2; + + numerator += diff1 * diff2; + sumSquare1 += diff1 * diff1; + sumSquare2 += diff2 * diff2; + } + + var denominator = Math.Sqrt(sumSquare1 * sumSquare2); + return denominator == 0 ? 0 : numerator / denominator; + } + + /// + /// Predict a rating for a specific item by a target user. + /// + /// The item for which the rating needs to be predicted. + /// The user for whom the rating is being predicted. + /// + /// A dictionary containing user ratings where: + /// - The key is the user's identifier (string). + /// - The value is another dictionary where the key is the item identifier (string), and the value is the rating given by the user (double). + /// + /// The predicted rating for the target item by the target user. + /// If there is insufficient data to predict a rating, the method returns 0. + /// + public double PredictRating(string targetItem, string targetUser, Dictionary> ratings) + { + var targetUserRatings = ratings[targetUser]; + double totalSimilarity = 0; + double weightedSum = 0; + + foreach (var otherUser in ratings.Keys.Where(u => u != targetUser)) + { + var otherUserRatings = ratings[otherUser]; + if (otherUserRatings.ContainsKey(targetItem)) + { + var similarity = CalculateSimilarity(targetUserRatings, otherUserRatings); + totalSimilarity += Math.Abs(similarity); + weightedSum += similarity * otherUserRatings[targetItem]; + } + } + + return totalSimilarity == 0 ? 0 : weightedSum / totalSimilarity; + } + } +} diff --git a/README.md b/README.md index f13516de..cefdef6b 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,8 @@ find more than one implementation for the same objective but using different alg * [Josephus Problem](./Algorithms/Numeric/JosephusProblem.cs) * [Newton's Square Root Calculation](./Algorithms/NewtonSquareRoot.cs) * [SoftMax Function](./Algorithms/Numeric/SoftMax.cs) + * [RecommenderSystem](./Algorithms/RecommenderSystem) + * [CollaborativeFiltering](./Algorithms/RecommenderSystem/CollaborativeFiltering) * [Searches](./Algorithms/Search) * [A-Star](./Algorithms/Search/AStar/) * [Binary Search](./Algorithms/Search/BinarySearcher.cs) From d8b51719c26a46da6bfe49ecf11090c637258d7f Mon Sep 17 00:00:00 2001 From: Duc Thanh Nguyen Date: Tue, 22 Oct 2024 00:36:19 +0700 Subject: [PATCH 2/5] To handle floating-point precision issues in C# --- Algorithms/RecommenderSystem/CollaborativeFiltering.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Algorithms/RecommenderSystem/CollaborativeFiltering.cs b/Algorithms/RecommenderSystem/CollaborativeFiltering.cs index cb5b5bd1..79f0f634 100644 --- a/Algorithms/RecommenderSystem/CollaborativeFiltering.cs +++ b/Algorithms/RecommenderSystem/CollaborativeFiltering.cs @@ -29,6 +29,7 @@ public double CalculateSimilarity(Dictionary user1Ratings, Dicti double numerator = 0; double sumSquare1 = 0; double sumSquare2 = 0; + double epsilon = 1e-10; for (var i = 0; i < commonItems.Count; i++) { @@ -41,7 +42,7 @@ public double CalculateSimilarity(Dictionary user1Ratings, Dicti } var denominator = Math.Sqrt(sumSquare1 * sumSquare2); - return denominator == 0 ? 0 : numerator / denominator; + return Math.Abs(denominator) < epsilon ? 0 : numerator / denominator; } /// @@ -62,6 +63,7 @@ public double PredictRating(string targetItem, string targetUser, Dictionary u != targetUser)) { @@ -74,7 +76,7 @@ public double PredictRating(string targetItem, string targetUser, Dictionary Date: Wed, 23 Oct 2024 09:01:23 +0700 Subject: [PATCH 3/5] add some test case --- Algorithms.Tests/Algorithms.Tests.csproj | 1 + .../CollaborativeFilteringTests.cs | 29 +++++++++++++++---- .../CollaborativeFiltering.cs | 9 +++++- .../ISimilarityCalculator.cs | 13 +++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 Algorithms/RecommenderSystem/ISimilarityCalculator.cs diff --git a/Algorithms.Tests/Algorithms.Tests.csproj b/Algorithms.Tests/Algorithms.Tests.csproj index 893564d8..d6d63755 100644 --- a/Algorithms.Tests/Algorithms.Tests.csproj +++ b/Algorithms.Tests/Algorithms.Tests.csproj @@ -20,6 +20,7 @@ + diff --git a/Algorithms.Tests/RecommenderSystem/CollaborativeFilteringTests.cs b/Algorithms.Tests/RecommenderSystem/CollaborativeFilteringTests.cs index b46dae92..208de6fb 100644 --- a/Algorithms.Tests/RecommenderSystem/CollaborativeFilteringTests.cs +++ b/Algorithms.Tests/RecommenderSystem/CollaborativeFilteringTests.cs @@ -1,4 +1,5 @@ using Algorithms.RecommenderSystem; +using Moq; using NUnit.Framework; using System.Collections.Generic; @@ -7,13 +8,15 @@ namespace Algorithms.Tests.RecommenderSystem [TestFixture] public class CollaborativeFilteringTests { - private CollaborativeFiltering recommender = new(); + private Mock? mockSimilarityCalculator; + private CollaborativeFiltering? recommender; private Dictionary> testRatings = null!; [SetUp] public void Setup() { - recommender = new CollaborativeFiltering(); + mockSimilarityCalculator = new Mock(); + recommender = new CollaborativeFiltering(mockSimilarityCalculator.Object); testRatings = new Dictionary> { @@ -49,7 +52,7 @@ public void CalculateSimilarity_WithValidInputs_ReturnsExpectedResults( var user1Ratings = new Dictionary { [commonItem] = rating1 }; var user2Ratings = new Dictionary { [commonItem] = rating2 }; - var similarity = recommender.CalculateSimilarity(user1Ratings, user2Ratings); + var similarity = recommender?.CalculateSimilarity(user1Ratings, user2Ratings); Assert.That(similarity, Is.InRange(-1.0, 1.0)); } @@ -60,7 +63,7 @@ public void CalculateSimilarity_WithNoCommonItems_ReturnsZero() var user1Ratings = new Dictionary { ["item1"] = 5.0 }; var user2Ratings = new Dictionary { ["item2"] = 4.0 }; - var similarity = recommender.CalculateSimilarity(user1Ratings, user2Ratings); + var similarity = recommender?.CalculateSimilarity(user1Ratings, user2Ratings); Assert.That(similarity, Is.EqualTo(0)); } @@ -68,9 +71,25 @@ public void CalculateSimilarity_WithNoCommonItems_ReturnsZero() [Test] public void PredictRating_WithNonexistentItem_ReturnsZero() { - var predictedRating = recommender.PredictRating("nonexistentItem", "user1", testRatings); + var predictedRating = recommender?.PredictRating("nonexistentItem", "user1", testRatings); Assert.That(predictedRating, Is.EqualTo(0)); } + + [Test] + public void PredictRating_WithOtherUserHavingRatedTargetItem_ShouldCalculateSimilarityAndWeightedSum() + { + var targetItem = "item1"; + var targetUser = "user1"; + + mockSimilarityCalculator? + .Setup(s => s.CalculateSimilarity(It.IsAny>(), It.IsAny>())) + .Returns(0.8); + + var predictedRating = recommender?.PredictRating(targetItem, targetUser, testRatings); + + Assert.That(predictedRating, Is.Not.EqualTo(0.0d)); + Assert.That(predictedRating, Is.EqualTo(3.5d).Within(0.01)); + } } } diff --git a/Algorithms/RecommenderSystem/CollaborativeFiltering.cs b/Algorithms/RecommenderSystem/CollaborativeFiltering.cs index 79f0f634..8da58b8a 100644 --- a/Algorithms/RecommenderSystem/CollaborativeFiltering.cs +++ b/Algorithms/RecommenderSystem/CollaborativeFiltering.cs @@ -6,6 +6,13 @@ namespace Algorithms.RecommenderSystem { public class CollaborativeFiltering { + private readonly ISimilarityCalculator similarityCalculator; + + public CollaborativeFiltering(ISimilarityCalculator similarityCalculator) + { + this.similarityCalculator = similarityCalculator; + } + /// /// Method to calculate similarity between two users using Pearson correlation. /// @@ -70,7 +77,7 @@ public double PredictRating(string targetItem, string targetUser, Dictionary user1Ratings, Dictionary user2Ratings); + } +} From 333a1971afb4b5a6d67b7b622b198cf0496d270e Mon Sep 17 00:00:00 2001 From: Duc Thanh Nguyen Date: Thu, 24 Oct 2024 08:57:37 +0700 Subject: [PATCH 4/5] Add the Geohash --- Algorithms.Tests/Other/GeohashTests.cs | 59 ++++++++++++++++++ Algorithms/Other/Geohash.cs | 84 ++++++++++++++++++++++++++ README.md | 1 + 3 files changed, 144 insertions(+) create mode 100644 Algorithms.Tests/Other/GeohashTests.cs create mode 100644 Algorithms/Other/Geohash.cs diff --git a/Algorithms.Tests/Other/GeohashTests.cs b/Algorithms.Tests/Other/GeohashTests.cs new file mode 100644 index 00000000..bf2cced4 --- /dev/null +++ b/Algorithms.Tests/Other/GeohashTests.cs @@ -0,0 +1,59 @@ +using Algorithms.Other; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Algorithms.Tests.Other +{ + [TestFixture] + public class GeohashTests + { + [Test] + public void Encode_ShouldReturnCorrectGeohash_ForHoChiMinhCity() + { + double latitude = 10.8231; + double longitude = 106.6297; + string result = Geohash.Encode(latitude, longitude); + Assert.That(result, Is.EqualTo("w3gvd6m3hh54")); + } + + [Test] + public void Encode_ShouldReturnCorrectGeohash_ForHanoi() + { + double latitude = 21.0285; + double longitude = 105.8542; + string result = Geohash.Encode(latitude, longitude); + Assert.That(result, Is.EqualTo("w7er8u0evss2")); + } + + [Test] + public void Encode_ShouldReturnCorrectGeohash_ForDaNang() + { + double latitude = 16.0544; + double longitude = 108.2022; + string result = Geohash.Encode(latitude, longitude); + Assert.That(result, Is.EqualTo("w6ugq4w7wj04")); + } + + [Test] + public void Encode_ShouldReturnCorrectGeohash_ForNhaTrang() + { + double latitude = 12.2388; + double longitude = 109.1967; + string result = Geohash.Encode(latitude, longitude); + Assert.That(result, Is.EqualTo("w6jtsu485t8v")); + } + + [Test] + public void Encode_ShouldReturnCorrectGeohash_ForVungTau() + { + double latitude = 10.3460; + double longitude = 107.0843; + string result = Geohash.Encode(latitude, longitude); + Assert.That(result, Is.EqualTo("w3u4ug2mv41m")); + } + } +} diff --git a/Algorithms/Other/Geohash.cs b/Algorithms/Other/Geohash.cs new file mode 100644 index 00000000..4ef8923c --- /dev/null +++ b/Algorithms/Other/Geohash.cs @@ -0,0 +1,84 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Algorithms.Other +{ + public class Geohash + { + private static readonly string Base32Characters = "0123456789bcdefghjkmnpqrstuvwxyz"; // Convert latitude and longitude coordinates into a concise string + private static readonly int GeohashLength = 12; // ± 1.86 cm + + /// + /// Encodes the provided latitude and longitude coordinates into a Geohash string. + /// Geohashing is a method to encode geographic coordinates (latitude, longitude). + /// into a short string of letters and digits. Each character in the resulting Geohash . + /// string adds more precision to the location. The longer the Geohash, the smaller the area. + /// + /// The latitude of the location to encode. It must be a value between -90 and 90. + /// The longitude of the location to encode. It must be a value between -180 and 180. + /// + /// A Geohash string of length 12 representing the location with high precision. + /// A longer Geohash provides higher precision in terms of geographic area. + /// and a 12-character Geohash can be accurate down to around 1.86 cm. + /// + public static string Encode(double latitude, double longitude) + { + double[] latitudeRange = new[] { -90.0, 90.0 }; + double[] longitudeRange = new[] { -180.0, 180.0 }; + bool isEncodingLongitude = true; + int currentBit = 0; + int base32Index = 0; + StringBuilder geohashResult = new StringBuilder(); + + while (geohashResult.Length < GeohashLength) + { + double midpoint; + + if (isEncodingLongitude) + { + midpoint = (longitudeRange[0] + longitudeRange[1]) / 2; + if (longitude > midpoint) + { + base32Index |= 1 << (4 - currentBit); + longitudeRange[0] = midpoint; + } + else + { + longitudeRange[1] = midpoint; + } + } + else + { + midpoint = (latitudeRange[0] + latitudeRange[1]) / 2; + if (latitude > midpoint) + { + base32Index |= 1 << (4 - currentBit); + latitudeRange[0] = midpoint; + } + else + { + latitudeRange[1] = midpoint; + } + } + + isEncodingLongitude = !isEncodingLongitude; + + if (currentBit < 4) + { + currentBit++; + } + else + { + geohashResult.Append(Base32Characters[base32Index]); + currentBit = 0; + base32Index = 0; + } + } + + return geohashResult.ToString(); + } + } +} diff --git a/README.md b/README.md index f67c4356..c392e0d0 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,7 @@ find more than one implementation for the same objective but using different alg * [Welford's Variance](./Algorithms/Other/WelfordsVariance.cs) * [Julian Easter](./Algorithms/Other/JulianEaster.cs) * [Pollard's Rho](./Algorithms/Other/PollardsRhoFactorizing.cs) + * [GeoLocation Hash](./Algorithms/Other/Geohash.cs) * [Problems](./Algorithms/Problems) * [Stable Marriage](./Algorithms/Problems/StableMarriage) * [Gale-Shapley](./Algorithms/Problems/StableMarriage/GaleShapley.cs) From a80e22fdc07b5a7ca354a683c9968dbbedec2fa6 Mon Sep 17 00:00:00 2001 From: Duc Thanh Nguyen Date: Thu, 24 Oct 2024 09:18:33 +0700 Subject: [PATCH 5/5] Add the Geohash and improve some code --- Algorithms/Other/Geohash.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Algorithms/Other/Geohash.cs b/Algorithms/Other/Geohash.cs index 4ef8923c..53507f85 100644 --- a/Algorithms/Other/Geohash.cs +++ b/Algorithms/Other/Geohash.cs @@ -6,10 +6,10 @@ namespace Algorithms.Other { - public class Geohash + public static class Geohash { - private static readonly string Base32Characters = "0123456789bcdefghjkmnpqrstuvwxyz"; // Convert latitude and longitude coordinates into a concise string - private static readonly int GeohashLength = 12; // ± 1.86 cm + private const string Base32Characters = "0123456789bcdefghjkmnpqrstuvwxyz"; // Convert latitude and longitude coordinates into a concise string + private const int GeohashLength = 12; // ± 1.86 cm /// /// Encodes the provided latitude and longitude coordinates into a Geohash string.