11
22using System . Text . Json ;
3+ using System . Globalization ;
4+ using System . Text ;
35using Microsoft . Extensions . Options ;
46using TNO . Core . Exceptions ;
57using TNO . Core . Extensions ;
@@ -206,6 +208,8 @@ public async Task<ReportResultModel> GenerateReportAsync(
206208 return new ReportSectionModel ( section , content , aggregations ) ;
207209 } ) ;
208210
211+ // Apply optional title-based de-duplication per section settings.
212+ ApplyTitleDeduplication ( sections ) ;
209213 var result = await GenerateReportAsync ( model , null , sections , viewOnWebOnly , isPreview ) ;
210214 result . Data = JsonDocument . Parse ( JsonSerializer . Serialize ( elasticResults ) ) ;
211215 return result ;
@@ -230,12 +234,86 @@ private async Task<ReportResultModel> GenerateReportAsync(
230234 bool viewOnWebOnly = false ,
231235 bool isPreview = false )
232236 {
237+ // Apply optional title-based de-duplication per section settings.
238+ ApplyTitleDeduplication ( sections ) ;
233239 var subject = await _reportEngine . GenerateReportSubjectAsync ( report , reportInstance , sections , viewOnWebOnly , isPreview ) ;
234240 var body = await _reportEngine . GenerateReportBodyAsync ( report , reportInstance , sections , GetLinkedReportContent , _storageOptions . GetUploadPath ( ) , viewOnWebOnly , isPreview ) ;
235241
236242 return new ReportResultModel ( ) { ReportId = report . Id , InstanceId = reportInstance ? . Id , Subject = subject , Body = body } ;
237243 }
238244
245+ /// <summary>
246+ /// When a section has Settings.RemoveDuplicateTitles enabled, remove items whose headline
247+ /// duplicates another story's headline in the overall report, keeping only the earliest story.
248+ /// Earliest is determined by PublishedOn, then PostedOn, then SortOrder as tie-breaker.
249+ /// </summary>
250+ /// <param name="sections">Sections keyed by name.</param>
251+ private static void ApplyTitleDeduplication ( Dictionary < string , ReportSectionModel > sections )
252+ {
253+ if ( sections == null || sections . Count == 0 ) return ;
254+
255+ // Build a map of normalized headline -> earliest content item (by PublishedOn/PostedOn/SortOrder)
256+ var earliestByTitle = new Dictionary < string , ContentModel > ( StringComparer . Ordinal ) ;
257+
258+ DateTime MinDate = DateTime . MinValue ;
259+ foreach ( var section in sections . Values )
260+ {
261+ foreach ( var item in section . Content )
262+ {
263+ var title = ( item ? . Headline ?? "" ) . Trim ( ) ;
264+ var key = NormalizeTitle ( title ) ;
265+ if ( string . IsNullOrWhiteSpace ( key ) ) continue ;
266+ var ts = item . PublishedOn ?? item . PostedOn ?? MinDate . AddSeconds ( item . SortOrder ) ;
267+ if ( ! earliestByTitle . TryGetValue ( key , out var existing ) )
268+ {
269+ earliestByTitle [ key ] = item ;
270+ }
271+ else
272+ {
273+ var existingTs = existing . PublishedOn ?? existing . PostedOn ?? MinDate . AddSeconds ( existing . SortOrder ) ;
274+ if ( ts < existingTs ) earliestByTitle [ key ] = item ;
275+ }
276+ }
277+ }
278+
279+ // Now filter sections that have RemoveDuplicateTitles enabled
280+ foreach ( var section in sections . Values )
281+ {
282+ if ( ! section . Settings . RemoveDuplicateTitles || section . SectionType != TNO . Entities . ReportSectionType . Content ) continue ;
283+
284+ var filtered = section . Content . Where ( item =>
285+ {
286+ var key = NormalizeTitle ( item ? . Headline ?? "" ) ;
287+ if ( string . IsNullOrWhiteSpace ( key ) ) return true ; // nothing to dedupe
288+ if ( ! earliestByTitle . TryGetValue ( key , out var earliest ) ) return true ; // no dup group
289+ return item . Id == earliest . Id ; // keep only earliest occurrence; drop others
290+ } ) . ToArray ( ) ;
291+
292+ section . Content = filtered ;
293+ }
294+ }
295+
296+ /// <summary>
297+ /// Normalize a title for duplicate comparison:
298+ /// - trim
299+ /// - lowercase (invariant)
300+ /// - remove diacritics
301+ /// - remove all non-letter/digit characters (ignore punctuation/whitespace)
302+ /// </summary>
303+ private static string NormalizeTitle ( string title )
304+ {
305+ if ( string . IsNullOrWhiteSpace ( title ) ) return string . Empty ;
306+ var t = title . Trim ( ) . ToLowerInvariant ( ) . Normalize ( NormalizationForm . FormD ) ;
307+ var sb = new StringBuilder ( t . Length ) ;
308+ foreach ( var ch in t )
309+ {
310+ var uc = CharUnicodeInfo . GetUnicodeCategory ( ch ) ;
311+ if ( uc == UnicodeCategory . NonSpacingMark ) continue ; // strip diacritics
312+ if ( char . IsLetterOrDigit ( ch ) ) sb . Append ( ch ) ;
313+ }
314+ return sb . ToString ( ) ;
315+ }
316+
239317 #region AV Overview
240318 /// <summary>
241319 /// Execute the report template to generate the subject and body.
0 commit comments