Skip to content

Commit ceabb6b

Browse files
committed
mmi-3349
1 parent e0bdc1f commit ceabb6b

File tree

5 files changed

+90
-3
lines changed

5 files changed

+90
-3
lines changed

api/net/Areas/Helpers/ReportHelper.cs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11

22
using System.Text.Json;
3+
using System.Globalization;
4+
using System.Text;
35
using Microsoft.Extensions.Options;
46
using TNO.Core.Exceptions;
57
using TNO.Core.Extensions;
@@ -206,6 +208,8 @@ public async Task<ReportResultModel> GenerateReportAsync(
206208
return new ReportSectionModel(section, content, aggregations);
207209
});
208210

211+
// Apply optional title-based de-duplication per section settings.
212+
ApplyTitleDeduplication(sections);
209213
var result = await GenerateReportAsync(model, null, sections, viewOnWebOnly, isPreview);
210214
result.Data = JsonDocument.Parse(JsonSerializer.Serialize(elasticResults));
211215
return result;
@@ -230,12 +234,86 @@ private async Task<ReportResultModel> GenerateReportAsync(
230234
bool viewOnWebOnly = false,
231235
bool isPreview = false)
232236
{
237+
// Apply optional title-based de-duplication per section settings.
238+
ApplyTitleDeduplication(sections);
233239
var subject = await _reportEngine.GenerateReportSubjectAsync(report, reportInstance, sections, viewOnWebOnly, isPreview);
234240
var body = await _reportEngine.GenerateReportBodyAsync(report, reportInstance, sections, GetLinkedReportContent, _storageOptions.GetUploadPath(), viewOnWebOnly, isPreview);
235241

236242
return new ReportResultModel() { ReportId = report.Id, InstanceId = reportInstance?.Id, Subject = subject, Body = body };
237243
}
238244

245+
/// <summary>
246+
/// When a section has Settings.RemoveDuplicateTitles enabled, remove items whose headline
247+
/// duplicates another story's headline in the overall report, keeping only the earliest story.
248+
/// Earliest is determined by PublishedOn, then PostedOn, then SortOrder as tie-breaker.
249+
/// </summary>
250+
/// <param name="sections">Sections keyed by name.</param>
251+
private static void ApplyTitleDeduplication(Dictionary<string, ReportSectionModel> sections)
252+
{
253+
if (sections == null || sections.Count == 0) return;
254+
255+
// Build a map of normalized headline -> earliest content item (by PublishedOn/PostedOn/SortOrder)
256+
var earliestByTitle = new Dictionary<string, ContentModel>(StringComparer.Ordinal);
257+
258+
DateTime MinDate = DateTime.MinValue;
259+
foreach (var section in sections.Values)
260+
{
261+
foreach (var item in section.Content)
262+
{
263+
var title = (item?.Headline ?? "").Trim();
264+
var key = NormalizeTitle(title);
265+
if (string.IsNullOrWhiteSpace(key)) continue;
266+
var ts = item.PublishedOn ?? item.PostedOn ?? MinDate.AddSeconds(item.SortOrder);
267+
if (!earliestByTitle.TryGetValue(key, out var existing))
268+
{
269+
earliestByTitle[key] = item;
270+
}
271+
else
272+
{
273+
var existingTs = existing.PublishedOn ?? existing.PostedOn ?? MinDate.AddSeconds(existing.SortOrder);
274+
if (ts < existingTs) earliestByTitle[key] = item;
275+
}
276+
}
277+
}
278+
279+
// Now filter sections that have RemoveDuplicateTitles enabled
280+
foreach (var section in sections.Values)
281+
{
282+
if (!section.Settings.RemoveDuplicateTitles || section.SectionType != TNO.Entities.ReportSectionType.Content) continue;
283+
284+
var filtered = section.Content.Where(item =>
285+
{
286+
var key = NormalizeTitle(item?.Headline ?? "");
287+
if (string.IsNullOrWhiteSpace(key)) return true; // nothing to dedupe
288+
if (!earliestByTitle.TryGetValue(key, out var earliest)) return true; // no dup group
289+
return item.Id == earliest.Id; // keep only earliest occurrence; drop others
290+
}).ToArray();
291+
292+
section.Content = filtered;
293+
}
294+
}
295+
296+
/// <summary>
297+
/// Normalize a title for duplicate comparison:
298+
/// - trim
299+
/// - lowercase (invariant)
300+
/// - remove diacritics
301+
/// - remove all non-letter/digit characters (ignore punctuation/whitespace)
302+
/// </summary>
303+
private static string NormalizeTitle(string title)
304+
{
305+
if (string.IsNullOrWhiteSpace(title)) return string.Empty;
306+
var t = title.Trim().ToLowerInvariant().Normalize(NormalizationForm.FormD);
307+
var sb = new StringBuilder(t.Length);
308+
foreach (var ch in t)
309+
{
310+
var uc = CharUnicodeInfo.GetUnicodeCategory(ch);
311+
if (uc == UnicodeCategory.NonSpacingMark) continue; // strip diacritics
312+
if (char.IsLetterOrDigit(ch)) sb.Append(ch);
313+
}
314+
return sb.ToString();
315+
}
316+
239317
#region AV Overview
240318
/// <summary>
241319
/// Execute the report template to generate the subject and body.

api/net/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
ARG BUILD_CONFIGURATION=Release
22
FROM mcr.microsoft.com/dotnet/aspnet:9.0 AS base
33

4-
# RUN apt-get update --fix-missing && apt-get -y upgrade
5-
# RUN apt -y install curl libc6-dev libgdiplus ffmpeg
6-
# RUN apt-get clean
4+
RUN apt-get update --fix-missing && apt-get -y upgrade
5+
RUN apt -y install curl libc6-dev libgdiplus ffmpeg
6+
RUN apt-get clean
77

88
EXPOSE 443 8080
99

app/editor/src/features/admin/reports/components/ReportSectionContent.tsx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ export const ReportSectionContent = ({ index }: IReportSectionContentProps) => {
6868
label="Remove Duplicate Content"
6969
tooltip="Remove content from this section that is in above sections"
7070
/>
71+
<FormikCheckbox
72+
name={`sections.${index}.settings.removeDuplicateTitles`}
73+
label="Remove duplicate stories with same title"
74+
tooltip="Ignore case, trim and punctuation; keep earliest by PublishedOn/PostedOn"
75+
/>
7176
<Show visible={!!section.folderId || !!section.linkedReportId}>
7277
<FormikCheckbox
7378
name={`sections.${index}.settings.overrideExcludeHistorical`}

libs/net/models/Settings/ReportSectionSettingsModel.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ public class ReportSectionSettingsModel
1616
public bool? CacheData { get; set; }
1717
public string Direction { get; set; } = "";
1818
public bool RemoveDuplicates { get; set; }
19+
public bool RemoveDuplicateTitles { get; set; }
1920
public bool OverrideExcludeHistorical { get; set; }
2021
public bool? InTableOfContents { get; set; }
2122
public bool HideEmpty { get; set; }
@@ -44,6 +45,7 @@ public ReportSectionSettingsModel(Dictionary<string, object> settings, JsonSeria
4445
this.CacheData = settings.GetDictionaryJsonValue("cacheData", false, options);
4546
this.Direction = settings.GetDictionaryJsonValue("direction", "", options)!;
4647
this.RemoveDuplicates = settings.GetDictionaryJsonValue("removeDuplicates", false, options)!;
48+
this.RemoveDuplicateTitles = settings.GetDictionaryJsonValue("removeDuplicateTitles", false, options)!;
4749
this.OverrideExcludeHistorical = settings.GetDictionaryJsonValue("overrideExcludeHistorical", false, options)!;
4850
this.InTableOfContents = settings.GetDictionaryJsonValue<bool?>("inTableOfContents", null, options)!;
4951
this.HideEmpty = settings.GetDictionaryJsonValue("hideEmpty", false, options)!;
@@ -69,6 +71,7 @@ public ReportSectionSettingsModel(JsonDocument settings, JsonSerializerOptions o
6971
this.CacheData = settings.GetElementValue("cacheData", false, options);
7072
this.Direction = settings.GetElementValue("direction", "", options)!;
7173
this.RemoveDuplicates = settings.GetElementValue("removeDuplicates", false, options)!;
74+
this.RemoveDuplicateTitles = settings.GetElementValue("removeDuplicateTitles", false, options)!;
7275
this.OverrideExcludeHistorical = settings.GetElementValue("overrideExcludeHistorical", false, options)!;
7376
this.InTableOfContents = settings.GetElementValue<bool?>("inTableOfContents", null, options)!;
7477
this.HideEmpty = settings.GetElementValue("hideEmpty", false, options)!;

libs/npm/core/src/hooks/api/interfaces/IReportSectionSettingsModel.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export interface IReportSectionSettingsModel {
88
cacheData?: boolean;
99
direction: 'row' | 'column';
1010
removeDuplicates: boolean;
11+
removeDuplicateTitles?: boolean;
1112
overrideExcludeHistorical: boolean;
1213
inTableOfContents?: boolean;
1314
hideEmpty: boolean;

0 commit comments

Comments
 (0)