diff --git a/app/editor/.yarn/cache/tno-core-npm-1.0.27-3063e2245b-0a37ba9541.zip b/app/editor/.yarn/cache/tno-core-npm-1.0.28-a07dd79a18-69ecabc771.zip similarity index 85% rename from app/editor/.yarn/cache/tno-core-npm-1.0.27-3063e2245b-0a37ba9541.zip rename to app/editor/.yarn/cache/tno-core-npm-1.0.28-a07dd79a18-69ecabc771.zip index e2adccfebc..bb556cdb29 100644 Binary files a/app/editor/.yarn/cache/tno-core-npm-1.0.27-3063e2245b-0a37ba9541.zip and b/app/editor/.yarn/cache/tno-core-npm-1.0.28-a07dd79a18-69ecabc771.zip differ diff --git a/app/editor/package.json b/app/editor/package.json index 43b808e04c..70631c2c55 100644 --- a/app/editor/package.json +++ b/app/editor/package.json @@ -60,7 +60,7 @@ "redux-logger": "3.0.6", "styled-components": "6.1.11", "stylis": "4.3.2", - "tno-core": "1.0.27" + "tno-core": "1.0.28" }, "devDependencies": { "@simbathesailor/use-what-changed": "2.0.0", diff --git a/app/editor/src/features/admin/reports/components/ReportContentOptions.tsx b/app/editor/src/features/admin/reports/components/ReportContentOptions.tsx index a93ba42ec2..565fc6cd1b 100644 --- a/app/editor/src/features/admin/reports/components/ReportContentOptions.tsx +++ b/app/editor/src/features/admin/reports/components/ReportContentOptions.tsx @@ -44,6 +44,11 @@ export const ReportContentOptions = () => { name="settings.content.excludeHistorical" tooltip="Exclude content already reported on in prior instances of this report" /> + { label="Remove Duplicate Content" tooltip="Remove content from this section that is in above sections" /> + { label="Remove Duplicate Content" tooltip="Remove content from this section that is in above sections" /> + + { + const sanitizedValues = sanitizeReport(values); try { setSubmitting(true); - const result = await updateReport(values); - const form = await onGenerate(toForm({ ...result, instances: values.instances }), true); + const result = await updateReport(sanitizedValues); + const form = await onGenerate( + toForm({ ...result, instances: sanitizedValues.instances }), + true, + ); if (form) updateForm(form); } catch { } finally { diff --git a/app/subscriber/src/features/my-reports/edit/ReportEditPage.tsx b/app/subscriber/src/features/my-reports/edit/ReportEditPage.tsx index 4dcf8393c4..163a6412c6 100644 --- a/app/subscriber/src/features/my-reports/edit/ReportEditPage.tsx +++ b/app/subscriber/src/features/my-reports/edit/ReportEditPage.tsx @@ -16,7 +16,7 @@ import { Col, IReportMessageModel, MessageTargetKey, ReportStatusName, Show } fr import { defaultReport } from '../constants'; import { IReportForm } from '../interfaces'; -import { sortContent, toForm } from '../utils'; +import { sanitizeReport, sortContent, toForm } from '../utils'; import { ContentEditForm } from './content'; import { ReportEditContextProvider } from './ReportEditContext'; import { ReportEditForm } from './ReportEditForm'; @@ -210,27 +210,28 @@ export const ReportEditPage = () => { const handleSubmit = React.useCallback( async (values: IReportForm) => { + const sanitizedValues = sanitizeReport(values); try { - const originalId = values.id; - const originalReportSections = values.sections; + const originalId = sanitizedValues.id; + const originalReportSections = sanitizedValues.sections; const sameNameReport = myReports.some( (r) => - r.name.trim().toLocaleLowerCase() === values.name.trim().toLocaleLowerCase() && - r.id !== values.id, + r.name.trim().toLocaleLowerCase() === sanitizedValues.name.trim().toLocaleLowerCase() && + r.id !== sanitizedValues.id, ); if (sameNameReport) { - toast.error(`A report with the name '${values.name}' already exists.`); + toast.error(`A report with the name '${sanitizedValues.name}' already exists.`); } else { - if (values.instances.length) { + if (sanitizedValues.instances.length) { // Apply new sort order values for content to stop content from moving around when it has the same sort order value. - values.instances[0] = { - ...values.instances[0], - content: sortContent(values.instances[0].content, true), + sanitizedValues.instances[0] = { + ...sanitizedValues.instances[0], + content: sortContent(sanitizedValues.instances[0].content, true), }; } const report = originalId ? await updateReport( - values, + sanitizedValues, instance && [ ReportStatusName.Pending, @@ -240,15 +241,15 @@ export const ReportEditPage = () => { ].includes(instance.status), ) : await addReport({ - ...values, - ownerId: values.ownerId ?? userInfo?.id ?? 0, + ...sanitizedValues, + ownerId: sanitizedValues.ownerId ?? userInfo?.id ?? 0, settings: { - ...values.settings, + ...sanitizedValues.settings, subject: { - ...values.settings.subject, - text: values.settings.subject.text.length // Default email subject line - ? values.settings.subject.text - : values.name, + ...sanitizedValues.settings.subject, + text: sanitizedValues.settings.subject.text.length // Default email subject line + ? sanitizedValues.settings.subject.text + : sanitizedValues.name, }, }, }); diff --git a/app/subscriber/src/features/my-reports/edit/content/ContentEditForm.tsx b/app/subscriber/src/features/my-reports/edit/content/ContentEditForm.tsx index b4e53b5929..88bcffd74b 100644 --- a/app/subscriber/src/features/my-reports/edit/content/ContentEditForm.tsx +++ b/app/subscriber/src/features/my-reports/edit/content/ContentEditForm.tsx @@ -3,7 +3,7 @@ import { Bar } from 'components/bar'; import { Sentiment } from 'components/sentiment'; import { IReportForm, IReportInstanceContentForm } from 'features/my-reports/interfaces'; import { IContentValidationErrors } from 'features/my-reports/interfaces/IContentValidationErrors'; -import { toForm } from 'features/my-reports/utils'; +import { sanitizeReport, toForm } from 'features/my-reports/utils'; import { formatDate } from 'features/utils'; import React from 'react'; import { useApp, useContent, useReports } from 'store/hooks'; @@ -186,7 +186,7 @@ export const ContentEditForm = React.forwardRef ({ ...instance, @@ -197,7 +197,9 @@ export const ContentEditForm = React.forwardRef { name="settings.content.excludeHistorical" label="Exclude stories that have been sent out in previous report" /> + + + + Keep only the most recent story when duplicate titles are published within the past + three days. + + + + + + Keep only the most recent story when duplicate titles occur within the last three + days. + + + + + + Keep only the most recent story when duplicate titles occur within the last three + days. + + + + + + Keep only the most recent story when duplicate titles occur within the last three + days. + + + + + + Keep only the most recent story when duplicate titles occur within the last three + days. + + + + + + This overrides the report option "Exclude stories that have been sent out in previous + report" for this section only. + + + + + + Keep only the most recent story when duplicate titles occur within the last three + days. + + + + + + Keep only the most recent story when duplicate titles occur within the last + three days. + + showImage: false, direction: 'row', removeDuplicates: false, + removeDuplicateTitles3Days: false, overrideExcludeHistorical: false, hideEmpty: false, groupBy: '', diff --git a/app/subscriber/src/features/my-reports/utils/index.ts b/app/subscriber/src/features/my-reports/utils/index.ts index f36177251a..3bf9c4e6dd 100644 --- a/app/subscriber/src/features/my-reports/utils/index.ts +++ b/app/subscriber/src/features/my-reports/utils/index.ts @@ -12,6 +12,7 @@ export * from './hideEmptySections'; export * from './isAutoSend'; export * from './isAutoSendDisabled'; export * from './moveContent'; +export * from './sanitizeReport'; export * from './setAutoSend'; export * from './sortContent'; export * from './sortReportContent'; diff --git a/app/subscriber/src/features/my-reports/utils/sanitizeReport.ts b/app/subscriber/src/features/my-reports/utils/sanitizeReport.ts new file mode 100644 index 0000000000..019fb4c276 --- /dev/null +++ b/app/subscriber/src/features/my-reports/utils/sanitizeReport.ts @@ -0,0 +1,143 @@ +import { IReportInstanceContentModel } from 'tno-core'; + +import { IReportForm } from '../interfaces'; + +const DUPLICATE_WINDOW_MS = 3 * 24 * 60 * 60 * 1000; + +const normalizeHeadline = (headline?: string | null) => headline?.trim().toUpperCase() ?? ''; + +const parseDate = (value?: string | Date | null) => { + if (!value) return undefined; + const date = new Date(value); + return Number.isNaN(date.getTime()) ? undefined : date; +}; + +type ReportContentItem = IReportInstanceContentModel & { + originalIndex?: number; + selected?: boolean; +}; + +const getComparableDate = (content?: ReportContentItem['content']) => + parseDate(content?.publishedOn) ?? + parseDate(content?.postedOn) ?? + parseDate(content?.updatedOn) ?? + parseDate(content?.createdOn) ?? + new Date(0); + +const getUpdatedDate = (content?: ReportContentItem['content']) => + parseDate(content?.updatedOn) ?? new Date(0); + +const getCreatedDate = (content?: ReportContentItem['content']) => + parseDate(content?.createdOn) ?? new Date(0); + +const dedupeContent = ( + items: ReportContentItem[], + predicate: (item: ReportContentItem) => boolean, + threshold: Date, +) => { + if (!items.length) return items; + + const indicesToDrop = new Set(); + const groups = new Map(); + + items.forEach((item, index) => { + if (!predicate(item)) return; + const title = normalizeHeadline(item.content?.headline); + if (!title) return; + const date = getComparableDate(item.content); + if (date < threshold) return; + const existing = groups.get(title); + if (existing) existing.push(index); + else groups.set(title, [index]); + }); + + const compareItems = (a: ReportContentItem, b: ReportContentItem) => { + const dateA = getComparableDate(a.content); + const dateB = getComparableDate(b.content); + if (dateA.getTime() !== dateB.getTime()) return dateB.getTime() - dateA.getTime(); + + const updatedA = getUpdatedDate(a.content); + const updatedB = getUpdatedDate(b.content); + if (updatedA.getTime() !== updatedB.getTime()) return updatedB.getTime() - updatedA.getTime(); + + const createdA = getCreatedDate(a.content); + const createdB = getCreatedDate(b.content); + if (createdA.getTime() !== createdB.getTime()) return createdB.getTime() - createdA.getTime(); + + if ((a.contentId ?? 0) !== (b.contentId ?? 0)) return (b.contentId ?? 0) - (a.contentId ?? 0); + + return (b.sortOrder ?? 0) - (a.sortOrder ?? 0); + }; + + groups.forEach((indices) => { + if (indices.length <= 1) return; + const ordered = indices.slice().sort((a, b) => compareItems(items[a], items[b])); + const [, ...others] = ordered; + others.forEach((idx) => indicesToDrop.add(idx)); + }); + + if (!indicesToDrop.size) return items; + + return items.filter((_, index) => !indicesToDrop.has(index)); +}; + +const reassignSortOrders = (items: T[]): T[] => { + const orderCounters = new Map(); + return items.map((item) => { + const current = orderCounters.get(item.sectionName) ?? 0; + orderCounters.set(item.sectionName, current + 1); + return { ...item, sortOrder: current }; + }); +}; + +/** + * Ensures report-level and section-level settings include explicit boolean values for + * removeDuplicateTitles3Days and applies dedupe logic before persisting. + */ +export const sanitizeReport = (report: IReportForm): IReportForm => { + const sanitizedSections = report.sections.map((section) => ({ + ...section, + settings: { + ...section.settings, + removeDuplicateTitles3Days: !!section.settings.removeDuplicateTitles3Days, + }, + })); + + const sanitizedSettings = { + ...report.settings, + content: { + ...report.settings.content, + removeDuplicateTitles3Days: !!report.settings.content.removeDuplicateTitles3Days, + }, + }; + + const threshold = new Date(Date.now() - DUPLICATE_WINDOW_MS); + + const sanitizedInstances = report.instances.map((instance) => { + const originalContent = instance.content ?? []; + let content: ReportContentItem[] = [...originalContent]; + + sanitizedSections + .filter((section) => section.settings.removeDuplicateTitles3Days) + .forEach((section) => { + content = dedupeContent(content, (item) => item.sectionName === section.name, threshold); + }); + + if (sanitizedSettings.content.removeDuplicateTitles3Days) + content = dedupeContent(content, () => true, threshold); + + content = reassignSortOrders(content); + + return { + ...instance, + content, + }; + }); + + return { + ...report, + settings: sanitizedSettings, + sections: sanitizedSections, + instances: sanitizedInstances, + }; +}; diff --git a/app/subscriber/src/features/my-reports/utils/toForm.ts b/app/subscriber/src/features/my-reports/utils/toForm.ts index 873a4da774..e80d72389c 100644 --- a/app/subscriber/src/features/my-reports/utils/toForm.ts +++ b/app/subscriber/src/features/my-reports/utils/toForm.ts @@ -3,6 +3,7 @@ import { IReportModel } from 'tno-core'; import { defaultReportSchedule } from '../constants'; import { IReportForm } from '../interfaces'; import { getHideEmpty } from './getHideEmpty'; +import { sanitizeReport } from './sanitizeReport'; import { sortContent } from './sortContent'; /** @@ -12,12 +13,23 @@ import { sortContent } from './sortContent'; * @returns a new form. */ export const toForm = (report: IReportModel, updateSortOrder: boolean = false): IReportForm => { - return { + const form: IReportForm = { ...report, hideEmptySections: getHideEmpty(report.sections), sections: report.sections.map((s) => ({ ...s, + settings: { + ...s.settings, + removeDuplicateTitles3Days: !!s.settings.removeDuplicateTitles3Days, + }, })), + settings: { + ...report.settings, + content: { + ...report.settings.content, + removeDuplicateTitles3Days: !!report.settings.content?.removeDuplicateTitles3Days, + }, + }, events: report.events.length === 2 ? report.events @@ -27,7 +39,17 @@ export const toForm = (report: IReportModel, updateSortOrder: boolean = false): ], instances: report.instances.map((i) => ({ ...i, - content: sortContent(i.content, updateSortOrder), + content: [...i.content], + })), + }; + + const sanitized = sanitizeReport(form); + + return { + ...sanitized, + instances: sanitized.instances.map((instance) => ({ + ...instance, + content: sortContent(instance.content, updateSortOrder), })), }; }; diff --git a/app/subscriber/yarn.lock b/app/subscriber/yarn.lock index bd9a4654f3..813822aaea 100644 --- a/app/subscriber/yarn.lock +++ b/app/subscriber/yarn.lock @@ -6089,17 +6089,10 @@ __metadata: languageName: node linkType: hard -"caniuse-lite@npm:^1.0.0, caniuse-lite@npm:^1.0.30001587, caniuse-lite@npm:^1.0.30001599": - version: 1.0.30001621 - resolution: "caniuse-lite@npm:1.0.30001621" - checksum: 0afb65bbf558faea769c16e831fbbd5600c684c0f6bb4ffbc0d38528671fb5cb5d88714804241a88c61872ce289f7c6333aef6cfdfb09277bda0dbdf0aab3459 - languageName: node - linkType: hard - -"caniuse-lite@npm:^1.0.30001688": - version: 1.0.30001702 - resolution: "caniuse-lite@npm:1.0.30001702" - checksum: ba8e88f0ef09a16f36de805c9491c3047986ab6bb1e0dc66f03067dce5e197be1c98cfaed21867bad851985f775b8d4fa50e7e37537c116a5fe1ae623dfd400c +"caniuse-lite@npm:^1.0.0, caniuse-lite@npm:^1.0.30001587, caniuse-lite@npm:^1.0.30001599, caniuse-lite@npm:^1.0.30001688": + version: 1.0.30001751 + resolution: "caniuse-lite@npm:1.0.30001751" + checksum: d11e25c44e40c21e7b7492a25c9fd60f4c04e94aa265573f7c487666f5e1b5ca3ed09d09560336f959237063616255cb294d415511bb6cf0486eb2cb6a3a4318 languageName: node linkType: hard @@ -12049,7 +12042,7 @@ __metadata: sheetjs: "file:packages/xlsx-0.20.1.tgz" styled-components: 6.1.11 stylis: 4.3.2 - tno-core: 1.0.27 + tno-core: 1.0.28 typescript: 4.9.5 vitest: 3.0.7 languageName: unknown @@ -16404,9 +16397,9 @@ __metadata: languageName: node linkType: hard -"tno-core@npm:1.0.27": - version: 1.0.27 - resolution: "tno-core@npm:1.0.27" +"tno-core@npm:1.0.28": + version: 1.0.28 + resolution: "tno-core@npm:1.0.28" dependencies: "@elastic/elasticsearch": ^8.13.1 "@fortawesome/free-solid-svg-icons": ^6.4.2 @@ -16439,7 +16432,7 @@ __metadata: styled-components: ^6.1.11 stylis: ^4.3.2 yup: ^1.1.1 - checksum: 0a37ba95419455d675d723d1373aab1c17d534d85c9ead4192653bb722b8da1d2c386444b4bdc9c4a806f0df8fc54f2e944ee24f7ae32d3ff9b28445b841384b + checksum: 69ecabc7713077cf82d686e05335a373a83f609d4eac6298f197ed6801ce832d5624801e2fb5eccafef971e7c0c88509a3178aae95a8b2f131086ced71e5b6c9 languageName: node linkType: hard diff --git a/libs/net/dal/Services/ReportService.cs b/libs/net/dal/Services/ReportService.cs index 3d6cb4c844..a47bbb99b2 100644 --- a/libs/net/dal/Services/ReportService.cs +++ b/libs/net/dal/Services/ReportService.cs @@ -1,3 +1,5 @@ +using System; +using System.Collections.Generic; using System.Security.Claims; using System.Text.Json; using Microsoft.EntityFrameworkCore; @@ -21,6 +23,7 @@ public class ReportService : BaseService, IReportService private readonly ITNOElasticClient _elasticClient; private readonly IReportInstanceService _reportInstanceService; private readonly JsonSerializerOptions _serializerOptions; + private static readonly TimeSpan DuplicateTitleWindow = TimeSpan.FromDays(3); #endregion #region Constructors @@ -524,6 +527,14 @@ public async Task GenerateReportInstanceAsync( }); } + if (reportSettings.Content.RemoveDuplicateTitles3Days) + { + var titleHistory = BuildTitleHistory( + GetPreviousReportInstances(report.Id, instanceId, requestorId ?? report.OwnerId, includeContent: true, qty: 10), + DuplicateTitleWindow); + instanceContent = FilterDuplicateTitlesWithinWindow(instanceContent, DuplicateTitleWindow, titleHistory); + } + return new ReportInstance( instanceId ?? 0, id, @@ -961,17 +972,39 @@ public ReportInstance[] GetPreviousReportInstances(int id, long? instanceId, int var reportSettings = JsonSerializer.Deserialize(report.Settings.ToJson(), _serializerOptions) ?? new(); var ownerId = requestorId ?? report.OwnerId; // TODO: Handle users generating instances for a report they do not own. + var sectionSettingsLookup = report.Sections + .ToDictionary( + s => s.Name, + s => JsonSerializer.Deserialize(s.Settings.ToJson(), _serializerOptions) ?? new ReportSectionSettingsModel()); + var requiresDuplicateHistory = reportSettings.Content.RemoveDuplicateTitles3Days || sectionSettingsLookup.Values.Any(s => s.RemoveDuplicateTitles3Days); var currentInstance = instanceId.HasValue ? this.Context.ReportInstances .AsNoTracking() .Include(ri => ri.ContentManyToMany) + .ThenInclude(c => c.Content) .Where(ri => ri.OwnerId == ownerId) .FirstOrDefault(ri => ri.Id == instanceId) : - GetCurrentReportInstance(report.Id, ownerId); - var previousInstances = GetPreviousReportInstances(report.Id, instanceId ?? currentInstance?.Id, ownerId); + GetCurrentReportInstance(report.Id, ownerId, includeContent: true); + var previousInstances = GetPreviousReportInstances( + report.Id, + instanceId ?? currentInstance?.Id, + ownerId, + includeContent: true, + qty: requiresDuplicateHistory ? 10 : 2); var instances = currentInstance?.SentOn.HasValue == true ? [currentInstance, .. previousInstances] : previousInstances; var previousInstance = instances.FirstOrDefault(); + var historyInstances = BuildHistoryInstances( + instances, + requiresDuplicateHistory, + report, + instanceId ?? currentInstance?.Id, + ownerId); + + ISet? titleHistory = requiresDuplicateHistory + ? BuildTitleHistory(historyInstances, DuplicateTitleWindow, currentInstance) + : null; + // Create an array of content from the previous instance to exclude. var excludeHistoricalContentIds = reportSettings.Content.ExcludeHistorical ? [.. instances.SelectMany(pi => pi.ContentManyToMany.Select((c) => c.ContentId)).Distinct()] @@ -991,7 +1024,7 @@ public ReportInstance[] GetPreviousReportInstances(int id, long? instanceId, int foreach (var section in report.Sections.OrderBy(s => s.SortOrder)) { - var sectionSettings = JsonSerializer.Deserialize(section.Settings.ToJson(), _serializerOptions) ?? new(); + var sectionSettings = sectionSettingsLookup[section.Name]; // Content in a folder is added first. if (section.FolderId.HasValue) @@ -1029,6 +1062,16 @@ public ReportInstance[] GetPreviousReportInstances(int id, long? instanceId, int .OrderBy(fc => fc.SortOrder) .ToArray(); + var removeDuplicateTitles = sectionSettings.RemoveDuplicateTitles3Days || reportSettings.Content.RemoveDuplicateTitles3Days; + if (removeDuplicateTitles) + { + content = FilterDuplicateTitlesWithinWindow(content, DuplicateTitleWindow, titleHistory); + } + else if (titleHistory != null) + { + AddTitlesToHistory(content, DuplicateTitleWindow, titleHistory); + } + var folderContent = new Elastic.Models.SearchResultModel(); folderContent.Hits.Hits = content .Select(c => new Elastic.Models.HitModel() @@ -1070,7 +1113,23 @@ public ReportInstance[] GetPreviousReportInstances(int id, long? instanceId, int // Add retry logic for Elasticsearch query to handle failures var content = await RetryElasticsearchQueryAsync(async () => await _elasticClient.SearchAsync(defaultIndex, query)); - var contentHits = content.Hits.Hits.ToArray(); + var removeDuplicateTitles = sectionSettings.RemoveDuplicateTitles3Days || reportSettings.Content.RemoveDuplicateTitles3Days; + var hits = content.Hits.Hits ?? Array.Empty>(); + if (content.Hits.Hits == null) + content.Hits.Hits = hits; + + if (removeDuplicateTitles) + { + hits = FilterDuplicateTitlesWithinWindow( + hits, + DuplicateTitleWindow, + titleHistory); + content.Hits.Hits = hits; + } + else if (titleHistory != null) + { + AddTitlesToHistory(hits, DuplicateTitleWindow, titleHistory); + } // Fetch custom content versions for the requestor. var contentIds = content.Hits.Hits.Select(h => h.Source.Id).Distinct().ToArray(); @@ -1165,7 +1224,16 @@ private async Task RetryElasticsearchQueryAsync(Func> operation) var reportSettings = JsonSerializer.Deserialize(report.Settings.ToJson(), _serializerOptions) ?? new(); var ownerId = requestorId ?? reportInstance.OwnerId; // TODO: Handle users generating instances for a report they do not own. - var previousInstances = reportInstance != null ? GetPreviousReportInstances(report.Id, reportInstance.Id, ownerId) : null; + var sectionSettings = JsonSerializer.Deserialize(section.Settings.ToJson(), _serializerOptions) ?? new(); + var removeDuplicateTitles = sectionSettings.RemoveDuplicateTitles3Days || reportSettings.Content.RemoveDuplicateTitles3Days; + var previousInstances = reportInstance != null + ? GetPreviousReportInstances( + report.Id, + reportInstance.Id, + ownerId, + includeContent: true, + qty: removeDuplicateTitles ? 10 : 2) + : null; var previousInstance = previousInstances?.FirstOrDefault(); // Organize the content sections, and remove the specified section. @@ -1205,7 +1273,19 @@ private async Task RetryElasticsearchQueryAsync(Func> operation) if (reportInstance != null) excludeAboveSectionContentIds.AddRange(contentAbove.Select(c => c.ContentId).ToArray()); - var sectionSettings = JsonSerializer.Deserialize(section.Settings.ToJson(), _serializerOptions) ?? new(); + var historyInstances = BuildHistoryInstances( + previousInstances ?? Array.Empty(), + removeDuplicateTitles, + report, + reportInstance?.Id, + ownerId); + + ISet? titleHistory = removeDuplicateTitles + ? BuildTitleHistory( + historyInstances, + DuplicateTitleWindow, + reportInstance) + : null; if (section.FolderId.HasValue) { @@ -1236,6 +1316,11 @@ private async Task RetryElasticsearchQueryAsync(Func> operation) .OrderBy(fc => fc.SortOrder) .ToArray(); + if (removeDuplicateTitles) + { + content = FilterDuplicateTitlesWithinWindow(content, DuplicateTitleWindow, titleHistory); + } + var folderContent = new Elastic.Models.SearchResultModel(); folderContent.Hits.Hits = content .Select(c => new Elastic.Models.HitModel() @@ -1272,6 +1357,13 @@ private async Task RetryElasticsearchQueryAsync(Func> operation) var defaultIndex = filterSettings.SearchUnpublished ? _elasticOptions.ContentIndex : _elasticOptions.PublishedIndex; var content = await RetryElasticsearchQueryAsync(async () => await _elasticClient.SearchAsync(defaultIndex, query)); + if (removeDuplicateTitles) + { + content.Hits.Hits = FilterDuplicateTitlesWithinWindow( + content.Hits.Hits ?? Array.Empty>(), + DuplicateTitleWindow, + titleHistory); + } // Fetch custom content versions for the requestor. var contentIds = content.Hits.Hits.Select(h => h.Source.Id).Distinct().ToArray(); @@ -1287,6 +1379,327 @@ private async Task RetryElasticsearchQueryAsync(Func> operation) return searchResults; } + private ISet BuildTitleHistory(IEnumerable instances, TimeSpan window, ReportInstance? currentInstance = null) + { + var history = new HashSet(StringComparer.OrdinalIgnoreCase); + var threshold = DateTime.UtcNow - window; + + void AddTitles(IEnumerable contents, bool ignoreThreshold) + { + foreach (var item in contents) + { + var title = NormalizeHeadline(item.Content?.Headline); + if (string.IsNullOrWhiteSpace(title)) continue; + + var date = GetComparableDate(item.Content); + if (!ignoreThreshold && date < threshold) continue; + + history.Add(title); + } + } + + if (currentInstance != null) + { + var includeAll = currentInstance.SentOn.HasValue == false; + if (includeAll || (currentInstance.SentOn ?? currentInstance.PublishedOn ?? DateTime.MinValue) >= threshold) + AddTitles(currentInstance.ContentManyToMany, includeAll); + } + + foreach (var instance in instances) + { + var includeAll = instance.SentOn.HasValue == false; + if (!includeAll && (instance.SentOn ?? instance.PublishedOn ?? DateTime.MinValue) < threshold) + continue; + + AddTitles(instance.ContentManyToMany, includeAll); + } + + return history; + } + + private IEnumerable BuildHistoryInstances( + IEnumerable baseInstances, + bool dedupEnabled, + Report report, + long? instanceId, + int? requestorId) + { + if (!dedupEnabled || !report.OwnerId.HasValue) return baseInstances; + if (requestorId.HasValue && requestorId.Value == report.OwnerId.Value) return baseInstances; + + var ownerInstances = GetPreviousReportInstances( + report.Id, + instanceId, + report.OwnerId, + includeContent: true, + qty: 10); + + if (ownerInstances.Length == 0) return baseInstances; + + var existingInstanceIds = new HashSet(baseInstances.Select(i => i.Id)); + return [.. baseInstances, .. ownerInstances.Where(i => !existingInstanceIds.Contains(i.Id))]; + } + + private void AddTitlesToHistory(IEnumerable items, TimeSpan window, ISet? titleHistory) + { + if (titleHistory == null) return; + + var threshold = DateTime.UtcNow - window; + foreach (var item in items) + { + var title = NormalizeHeadline(item.Content?.Headline); + if (string.IsNullOrWhiteSpace(title)) continue; + + var date = GetComparableDate(item.Content); + if (date < threshold) continue; + + titleHistory.Add(title); + } + } + + private void AddTitlesToHistory(IEnumerable> hits, TimeSpan window, ISet? titleHistory) + { + if (titleHistory == null) return; + + var threshold = DateTime.UtcNow - window; + foreach (var hit in hits) + { + var title = NormalizeHeadline(hit.Source?.Headline); + if (string.IsNullOrWhiteSpace(title)) continue; + + var date = GetComparableDate(hit.Source); + if (date < threshold) continue; + + titleHistory.Add(title); + } + } + + private FolderContent[] FilterDuplicateTitlesWithinWindow(FolderContent[] items, TimeSpan window, ISet? titleHistory = null) + { + if (items.Length == 0) return items; + + var keep = new bool[items.Length]; + Array.Fill(keep, true); + var threshold = DateTime.UtcNow - window; + + var metadata = items.Select((item, index) => new + { + Item = item, + Index = index, + Title = NormalizeHeadline(item.Content?.Headline), + Date = GetComparableDate(item.Content) + }).ToArray(); + + if (titleHistory != null) + { + foreach (var entry in metadata) + { + if (string.IsNullOrWhiteSpace(entry.Title)) continue; + if (entry.Date < threshold) continue; + if (titleHistory.Contains(entry.Title)) + keep[entry.Index] = false; + } + } + + foreach (var group in metadata + .Where(x => !string.IsNullOrWhiteSpace(x.Title)) + .GroupBy(x => x.Title!, StringComparer.OrdinalIgnoreCase)) + { + var recent = group.Where(x => x.Date >= threshold).ToArray(); + if (recent.Length <= 1) continue; + + var latest = recent + .OrderByDescending(x => x.Date) + .ThenByDescending(x => x.Item.Content?.UpdatedOn ?? DateTime.MinValue) + .ThenByDescending(x => x.Item.ContentId) + .First(); + + foreach (var entry in recent) + { + if (entry.Index != latest.Index) + keep[entry.Index] = false; + } + } + + var results = new List(items.Length); + for (var i = 0; i < items.Length; i++) + { + if (keep[i]) + { + results.Add(items[i]); + if (titleHistory != null) + { + var title = metadata[i].Title; + if (!string.IsNullOrWhiteSpace(title) && metadata[i].Date >= threshold) + titleHistory.Add(title); + } + } + } + + return results.ToArray(); + } + + private Elastic.Models.HitModel[] FilterDuplicateTitlesWithinWindow( + IEnumerable> hits, + TimeSpan window, + ISet? titleHistory = null) + { + var hitArray = hits.ToArray(); + if (hitArray.Length == 0) return hitArray; + + var keep = new bool[hitArray.Length]; + Array.Fill(keep, true); + var threshold = DateTime.UtcNow - window; + + var metadata = hitArray.Select((hit, index) => new + { + Hit = hit, + Index = index, + Title = NormalizeHeadline(hit.Source?.Headline), + Date = GetComparableDate(hit.Source) + }).ToArray(); + + if (titleHistory != null) + { + foreach (var entry in metadata) + { + if (string.IsNullOrWhiteSpace(entry.Title)) continue; + if (entry.Date < threshold) continue; + if (titleHistory.Contains(entry.Title)) + keep[entry.Index] = false; + } + } + + foreach (var group in metadata + .Where(x => !string.IsNullOrWhiteSpace(x.Title)) + .GroupBy(x => x.Title!, StringComparer.OrdinalIgnoreCase)) + { + var recent = group.Where(x => x.Date >= threshold).ToArray(); + if (recent.Length <= 1) continue; + + var latest = recent + .OrderByDescending(x => x.Date) + .ThenByDescending(x => x.Hit.Source?.UpdatedOn ?? DateTime.MinValue) + .ThenByDescending(x => x.Hit.Source?.Id ?? 0) + .First(); + + foreach (var entry in recent) + { + if (entry.Index != latest.Index) + keep[entry.Index] = false; + } + } + + var results = new List>(hitArray.Length); + for (var i = 0; i < hitArray.Length; i++) + { + if (keep[i]) + { + results.Add(hitArray[i]); + if (titleHistory != null) + { + var title = metadata[i].Title; + if (!string.IsNullOrWhiteSpace(title) && metadata[i].Date >= threshold) + titleHistory.Add(title); + } + } + } + + return results.ToArray(); + } + + private List FilterDuplicateTitlesWithinWindow(List content, TimeSpan window, ISet? titleHistory = null) + { + if (content.Count == 0) return content; + + var keep = new bool[content.Count]; + Array.Fill(keep, true); + var threshold = DateTime.UtcNow - window; + + var metadata = content.Select((item, index) => new + { + Item = item, + Index = index, + Title = NormalizeHeadline(item.Content?.Headline), + Date = GetComparableDate(item.Content) + }).ToArray(); + + if (titleHistory != null) + { + foreach (var entry in metadata) + { + if (string.IsNullOrWhiteSpace(entry.Title)) continue; + if (entry.Date < threshold) continue; + if (titleHistory.Contains(entry.Title)) + keep[entry.Index] = false; + } + } + + foreach (var group in metadata + .Where(x => !string.IsNullOrWhiteSpace(x.Title)) + .GroupBy(x => x.Title!, StringComparer.OrdinalIgnoreCase)) + { + var recent = group.Where(x => x.Date >= threshold).ToArray(); + if (recent.Length <= 1) continue; + + var latest = recent + .OrderByDescending(x => x.Date) + .ThenByDescending(x => x.Item.Content?.UpdatedOn ?? DateTime.MinValue) + .ThenByDescending(x => x.Item.ContentId) + .First(); + + foreach (var entry in recent) + { + if (entry.Index != latest.Index) + keep[entry.Index] = false; + } + } + + var results = new List(content.Count); + for (var i = 0; i < content.Count; i++) + { + if (keep[i]) + { + results.Add(content[i]); + if (titleHistory != null) + { + var title = metadata[i].Title; + if (!string.IsNullOrWhiteSpace(title) && metadata[i].Date >= threshold) + titleHistory.Add(title); + } + } + } + + foreach (var sectionGroup in results.GroupBy(c => c.SectionName)) + { + var order = 0; + foreach (var item in sectionGroup.OrderBy(c => c.SortOrder)) + item.SortOrder = order++; + } + + return results; + } + + private static string? NormalizeHeadline(string? headline) + { + return string.IsNullOrWhiteSpace(headline) ? null : headline.Trim().ToUpperInvariant(); + } + + private static DateTime GetComparableDate(API.Areas.Services.Models.Content.ContentModel? content) + { + return content == null ? DateTime.MinValue : GetComparableDate(content.PublishedOn, content.PostedOn, content.UpdatedOn, content.CreatedOn); + } + + private static DateTime GetComparableDate(Content? content) + { + return content == null ? DateTime.MinValue : GetComparableDate(content.PublishedOn, content.PostedOn, content.UpdatedOn, content.CreatedOn); + } + + private static DateTime GetComparableDate(DateTime? publishedOn, DateTime? postedOn, DateTime? updatedOn, DateTime? createdOn) + { + return publishedOn ?? postedOn ?? updatedOn ?? createdOn ?? DateTime.MinValue; + } + /// /// Get the content from the current report instance for the specified 'reportId' and 'ownerId'. /// Including the 'ownerId' ensures the report the user generates is coupled with prior instances for the same user. diff --git a/libs/net/models/Settings/ReportContentSettingsModel.cs b/libs/net/models/Settings/ReportContentSettingsModel.cs index 39ee743648..3c9b5eeec2 100644 --- a/libs/net/models/Settings/ReportContentSettingsModel.cs +++ b/libs/net/models/Settings/ReportContentSettingsModel.cs @@ -12,6 +12,7 @@ public class ReportContentSettingsModel public IEnumerable ExcludeReports { get; set; } = Array.Empty(); public bool ShowLinkToStory { get; set; } public bool HighlightKeywords { get; set; } + public bool RemoveDuplicateTitles3Days { get; set; } /// /// get/set - Accumulate content on each run until sent. @@ -45,6 +46,7 @@ public ReportContentSettingsModel(Dictionary settings, JsonSeria this.ExcludeReports = settings.GetDictionaryJsonValue("excludeReports", Array.Empty(), options)!; this.ShowLinkToStory = settings.GetDictionaryJsonValue("showLinkToStory", false, options)!; this.HighlightKeywords = settings.GetDictionaryJsonValue("highlightKeywords", false, options)!; + this.RemoveDuplicateTitles3Days = settings.GetDictionaryJsonValue("removeDuplicateTitles3Days", false, options)!; this.CopyPriorInstance = settings.GetDictionaryJsonValue("copyPriorInstance", false, options)!; this.ClearOnStartNewReport = settings.GetDictionaryJsonValue("clearOnStartNewReport", false, options)!; this.ExcludeContentInUnsentReport = settings.GetDictionaryJsonValue("excludeContentInUnsentReport", true, options)!; diff --git a/libs/net/models/Settings/ReportSectionSettingsModel.cs b/libs/net/models/Settings/ReportSectionSettingsModel.cs index de0243c689..eb5ce367a5 100644 --- a/libs/net/models/Settings/ReportSectionSettingsModel.cs +++ b/libs/net/models/Settings/ReportSectionSettingsModel.cs @@ -16,6 +16,7 @@ public class ReportSectionSettingsModel public bool? CacheData { get; set; } public string Direction { get; set; } = ""; public bool RemoveDuplicates { get; set; } + public bool RemoveDuplicateTitles3Days { get; set; } public bool OverrideExcludeHistorical { get; set; } public bool? InTableOfContents { get; set; } public bool HideEmpty { get; set; } @@ -44,6 +45,7 @@ public ReportSectionSettingsModel(Dictionary settings, JsonSeria this.CacheData = settings.GetDictionaryJsonValue("cacheData", false, options); this.Direction = settings.GetDictionaryJsonValue("direction", "", options)!; this.RemoveDuplicates = settings.GetDictionaryJsonValue("removeDuplicates", false, options)!; + this.RemoveDuplicateTitles3Days = settings.GetDictionaryJsonValue("removeDuplicateTitles3Days", false, options)!; this.OverrideExcludeHistorical = settings.GetDictionaryJsonValue("overrideExcludeHistorical", false, options)!; this.InTableOfContents = settings.GetDictionaryJsonValue("inTableOfContents", null, options)!; this.HideEmpty = settings.GetDictionaryJsonValue("hideEmpty", false, options)!; @@ -69,6 +71,7 @@ public ReportSectionSettingsModel(JsonDocument settings, JsonSerializerOptions o this.CacheData = settings.GetElementValue("cacheData", false, options); this.Direction = settings.GetElementValue("direction", "", options)!; this.RemoveDuplicates = settings.GetElementValue("removeDuplicates", false, options)!; + this.RemoveDuplicateTitles3Days = settings.GetElementValue("removeDuplicateTitles3Days", false, options)!; this.OverrideExcludeHistorical = settings.GetElementValue("overrideExcludeHistorical", false, options)!; this.InTableOfContents = settings.GetElementValue("inTableOfContents", null, options)!; this.HideEmpty = settings.GetElementValue("hideEmpty", false, options)!; diff --git a/libs/npm/core/package.json b/libs/npm/core/package.json index b261d2eb11..9e019fb42e 100644 --- a/libs/npm/core/package.json +++ b/libs/npm/core/package.json @@ -1,7 +1,7 @@ { "name": "tno-core", "description": "TNO shared library", - "version": "1.0.27", + "version": "1.0.28", "homepage": "https://github.com/bcgov/tno", "license": "Apache-2.0", "files": [ diff --git a/libs/npm/core/src/hooks/api/interfaces/IReportContentSettingsModel.ts b/libs/npm/core/src/hooks/api/interfaces/IReportContentSettingsModel.ts index 0d8fa57254..3902ac3973 100644 --- a/libs/npm/core/src/hooks/api/interfaces/IReportContentSettingsModel.ts +++ b/libs/npm/core/src/hooks/api/interfaces/IReportContentSettingsModel.ts @@ -5,6 +5,7 @@ export interface IReportContentSettingsModel { excludeReports: number[]; showLinkToStory: boolean; highlightKeywords: boolean; + removeDuplicateTitles3Days: boolean; copyPriorInstance: boolean; clearOnStartNewReport: boolean; excludeContentInUnsentReport: boolean; diff --git a/libs/npm/core/src/hooks/api/interfaces/IReportSectionSettingsModel.ts b/libs/npm/core/src/hooks/api/interfaces/IReportSectionSettingsModel.ts index d7ef8ff725..64e0c3cca3 100644 --- a/libs/npm/core/src/hooks/api/interfaces/IReportSectionSettingsModel.ts +++ b/libs/npm/core/src/hooks/api/interfaces/IReportSectionSettingsModel.ts @@ -8,6 +8,7 @@ export interface IReportSectionSettingsModel { cacheData?: boolean; direction: 'row' | 'column'; removeDuplicates: boolean; + removeDuplicateTitles3Days: boolean; overrideExcludeHistorical: boolean; inTableOfContents?: boolean; hideEmpty: boolean;