Skip to content

Commit 5a61c4d

Browse files
committed
parent 3e15d8f
author Enkidu93 <[email protected]> 1765920252 -0500 committer Enkidu93 <[email protected]> 1766155017 -0500 Cap the number of warnings to 100; add project name to warning and reformat warning message for better readability; Only generate warnings for each file once Add max warnings to build options; fix filtering logic; add tests Fix version typo
1 parent 3e15d8f commit 5a61c4d

File tree

13 files changed

+116
-46
lines changed

13 files changed

+116
-46
lines changed

src/Machine/src/Serval.Machine.Shared/Configuration/BuildJobOptions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ public class BuildJobOptions
66

77
public IList<ClearMLBuildQueue> ClearML { get; set; } = new List<ClearMLBuildQueue>();
88
public bool PreserveBuildFiles { get; set; } = false;
9+
public int MaxWarnings { get; set; } = 1000;
910
}

src/Machine/src/Serval.Machine.Shared/Services/NmtPreprocessBuildJob.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ public class NmtPreprocessBuildJob(
88
IBuildJobService<TranslationEngine> buildJobService,
99
ISharedFileService sharedFileService,
1010
ILanguageTagService languageTagService,
11-
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
11+
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService,
12+
IOptionsMonitor<BuildJobOptions> options
1213
)
1314
: TranslationPreprocessBuildJob(
1415
platformService,
@@ -17,7 +18,8 @@ IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
1718
logger,
1819
buildJobService,
1920
sharedFileService,
20-
parallelCorpusPreprocessingService
21+
parallelCorpusPreprocessingService,
22+
options
2123
)
2224
{
2325
private readonly ILanguageTagService _languageTagService = languageTagService;
@@ -87,6 +89,14 @@ CancellationToken cancellationToken
8789
corpora
8890
);
8991

92+
int maxWarnings = BuildJobOptions.MaxWarnings;
93+
if (warnings.Count > maxWarnings)
94+
{
95+
string tooManyWarningsWarning =
96+
$"There were {warnings.Count} warnings. Only the first {maxWarnings} are shown.";
97+
warnings = [tooManyWarningsWarning, .. warnings.Take(maxWarnings)];
98+
}
99+
90100
// Log summary of build data
91101
JsonObject buildPreprocessSummary =
92102
new()

src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ public abstract class PreprocessBuildJob<TEngine>(
77
ILogger<PreprocessBuildJob<TEngine>> logger,
88
IBuildJobService<TEngine> buildJobService,
99
ISharedFileService sharedFileService,
10-
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
10+
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService,
11+
IOptionsMonitor<BuildJobOptions> options
1112
)
1213
: HangfireBuildJob<TEngine, IReadOnlyList<ParallelCorpus>>(
1314
platformService,
@@ -24,7 +25,7 @@ IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
2425
new() { Indented = true, Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping };
2526

2627
internal BuildJobRunnerType TrainJobRunnerType { get; init; } = BuildJobRunnerType.ClearML;
27-
28+
protected readonly BuildJobOptions BuildJobOptions = options.CurrentValue;
2829
protected readonly ISharedFileService SharedFileService = sharedFileService;
2930
protected readonly IParallelCorpusPreprocessingService ParallelCorpusPreprocessingService =
3031
parallelCorpusPreprocessingService;
@@ -148,7 +149,7 @@ IReadOnlyList<ParallelCorpus> corpora
148149
foreach (UsfmVersificationError error in errors)
149150
{
150151
warnings.Add(
151-
$"USFM does not match project versification for parallel corpus {parallelCorpus.Id}, monolingual corpus {monolingualCorpusId}: Expected verse {error.ExpectedVerseRef}, Actual verse {error.ActualVerseRef}, Mismatch type {error.Type}"
152+
$"USFM versification error in project {error.ProjectName}, expected verse {error.ExpectedVerseRef}”, actual verse {error.ActualVerseRef}”, mismatch type {error.Type} (parallel corpus {parallelCorpus.Id}, monolingual corpus {monolingualCorpusId})"
152153
);
153154
}
154155
}

src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPreprocessBuildJob.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ public class SmtTransferPreprocessBuildJob(
99
ISharedFileService sharedFileService,
1010
IDistributedReaderWriterLockFactory lockFactory,
1111
IRepository<TrainSegmentPair> trainSegmentPairs,
12-
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
12+
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService,
13+
IOptionsMonitor<BuildJobOptions> options
1314
)
1415
: TranslationPreprocessBuildJob(
1516
platformService,
@@ -18,7 +19,8 @@ IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
1819
logger,
1920
buildJobService,
2021
sharedFileService,
21-
parallelCorpusPreprocessingService
22+
parallelCorpusPreprocessingService,
23+
options
2224
)
2325
{
2426
private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory;

src/Machine/src/Serval.Machine.Shared/Services/TranslationPreprocessBuildJob.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ public class TranslationPreprocessBuildJob(
77
ILogger<PreprocessBuildJob<TranslationEngine>> logger,
88
IBuildJobService<TranslationEngine> buildJobService,
99
ISharedFileService sharedFileService,
10-
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
10+
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService,
11+
IOptionsMonitor<BuildJobOptions> options
1112
)
1213
: PreprocessBuildJob<TranslationEngine>(
1314
platformService,
@@ -16,7 +17,8 @@ IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
1617
logger,
1718
buildJobService,
1819
sharedFileService,
19-
parallelCorpusPreprocessingService
20+
parallelCorpusPreprocessingService,
21+
options
2022
)
2123
{
2224
protected override async Task<(int TrainCount, int InferenceCount)> WriteDataFilesAsync(

src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentPreprocessBuildJob.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ public class WordAlignmentPreprocessBuildJob(
77
ILogger<WordAlignmentPreprocessBuildJob> logger,
88
IBuildJobService<WordAlignmentEngine> buildJobService,
99
ISharedFileService sharedFileService,
10-
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
10+
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService,
11+
IOptionsMonitor<BuildJobOptions> options
1112
)
1213
: PreprocessBuildJob<WordAlignmentEngine>(
1314
platformService,
@@ -16,7 +17,8 @@ IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
1617
logger,
1718
buildJobService,
1819
sharedFileService,
19-
parallelCorpusPreprocessingService
20+
parallelCorpusPreprocessingService,
21+
options
2022
)
2123
{
2224
protected override async Task<(int TrainCount, int InferenceCount)> WriteDataFilesAsync(

src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ public TestEnvironment()
146146
.When(x => x.StopTaskAsync("job1", Arg.Any<CancellationToken>()))
147147
.Do(_ => _cancellationTokenSource.Cancel());
148148
SharedFileService = new SharedFileService(Substitute.For<ILoggerFactory>());
149-
var buildJobOptions = Substitute.For<IOptionsMonitor<BuildJobOptions>>();
150-
buildJobOptions.CurrentValue.Returns(
149+
BuildJobOptions = Substitute.For<IOptionsMonitor<BuildJobOptions>>();
150+
BuildJobOptions.CurrentValue.Returns(
151151
new BuildJobOptions
152152
{
153153
ClearML =
@@ -181,7 +181,7 @@ public TestEnvironment()
181181
Engines
182182
)
183183
],
184-
buildJobOptions
184+
BuildJobOptions
185185
)
186186
],
187187
Engines
@@ -193,7 +193,7 @@ public TestEnvironment()
193193
ClearMLService,
194194
SharedFileService,
195195
clearMLOptions,
196-
buildJobOptions,
196+
BuildJobOptions,
197197
Substitute.For<ILogger<ClearMLMonitorService>>()
198198
);
199199
_jobServer = CreateJobServer();
@@ -207,6 +207,7 @@ public TestEnvironment()
207207
public IClearMLService ClearMLService { get; }
208208
public ISharedFileService SharedFileService { get; }
209209
public IBuildJobService<TranslationEngine> BuildJobService { get; }
210+
public IOptionsMonitor<BuildJobOptions> BuildJobOptions { get; }
210211

211212
public void PersistModel()
212213
{
@@ -329,7 +330,8 @@ public override object ActivateJob(Type jobType)
329330
_env.BuildJobService,
330331
_env.SharedFileService,
331332
new LanguageTagService(),
332-
new ParallelCorpusPreprocessingService(new TextCorpusService())
333+
new ParallelCorpusPreprocessingService(new TextCorpusService()),
334+
_env.BuildJobOptions
333335
);
334336
}
335337
if (jobType == typeof(TranslationPostprocessBuildJob))
@@ -343,7 +345,7 @@ public override object ActivateJob(Type jobType)
343345
_env.BuildJobService,
344346
Substitute.For<ILogger<TranslationPostprocessBuildJob>>(),
345347
_env.SharedFileService,
346-
buildJobOptions
348+
_env.BuildJobOptions
347349
);
348350
}
349351
return base.ActivateJob(jobType);

src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@ public async Task RunAsync_BuildWarnings()
130130

131131
await env.RunBuildJobAsync(corpus1, useKeyTerms: true);
132132
Assert.That(env.ExecutionData.Warnings, Has.Count.EqualTo(8));
133+
134+
env.BuildJobOptions.CurrentValue.Returns(new BuildJobOptions() { MaxWarnings = 2 });
135+
await env.RunBuildJobAsync(corpus1, useKeyTerms: true);
136+
// Two warnings after truncation + one warning mentioning that warnings were truncated
137+
Assert.That(env.ExecutionData.Warnings, Has.Count.EqualTo(3));
133138
}
134139

135140
[Test]
@@ -474,6 +479,11 @@ public async Task ParallelCorpusAsync()
474479
pretranslations[2]!["translation"]!.ToString(),
475480
Is.EqualTo("Source one, chapter twelve, verse one.")
476481
);
482+
Assert.That(
483+
env.ExecutionData.Warnings,
484+
Has.Count.EqualTo(16),
485+
JsonSerializer.Serialize(env.ExecutionData.Warnings)
486+
);
477487
});
478488
}
479489

@@ -794,7 +804,8 @@ public PreprocessBuildJob<TranslationEngine> GetBuildJob(EngineType engineType)
794804
BuildJobService,
795805
SharedFileService,
796806
new LanguageTagService(),
797-
new ParallelCorpusPreprocessingService(TextCorpusService)
807+
new ParallelCorpusPreprocessingService(TextCorpusService),
808+
BuildJobOptions
798809
);
799810
}
800811
case EngineType.SmtTransfer:
@@ -808,7 +819,8 @@ public PreprocessBuildJob<TranslationEngine> GetBuildJob(EngineType engineType)
808819
SharedFileService,
809820
LockFactory,
810821
TrainSegmentPairs,
811-
new ParallelCorpusPreprocessingService(TextCorpusService)
822+
new ParallelCorpusPreprocessingService(TextCorpusService),
823+
BuildJobOptions
812824
);
813825
}
814826
default:

src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,8 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp
288288
SharedFileService = new SharedFileService(Substitute.For<ILoggerFactory>());
289289
var clearMLOptions = Substitute.For<IOptionsMonitor<ClearMLOptions>>();
290290
clearMLOptions.CurrentValue.Returns(new ClearMLOptions());
291-
var buildJobOptions = Substitute.For<IOptionsMonitor<BuildJobOptions>>();
292-
buildJobOptions.CurrentValue.Returns(
291+
BuildJobOptions = Substitute.For<IOptionsMonitor<BuildJobOptions>>();
292+
BuildJobOptions.CurrentValue.Returns(
293293
new BuildJobOptions
294294
{
295295
ClearML =
@@ -335,7 +335,7 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp
335335
ClearMLService,
336336
SharedFileService,
337337
clearMLOptions,
338-
buildJobOptions,
338+
BuildJobOptions,
339339
Substitute.For<ILogger<ClearMLMonitorService>>()
340340
);
341341
BuildJobService = new BuildJobService<TranslationEngine>(
@@ -344,7 +344,7 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp
344344
new ClearMLBuildJobRunner(
345345
ClearMLService,
346346
[new SmtTransferClearMLBuildJobFactory(SharedFileService, Engines)],
347-
buildJobOptions
347+
BuildJobOptions
348348
)
349349
],
350350
Engines
@@ -365,6 +365,7 @@ [new SmtTransferClearMLBuildJobFactory(SharedFileService, Engines)],
365365
public ITruecaser Truecaser { get; }
366366
public ITrainer TruecaserTrainer { get; }
367367
public IPlatformService PlatformService { get; }
368+
public IOptionsMonitor<BuildJobOptions> BuildJobOptions { get; }
368369

369370
public IClearMLService ClearMLService { get; }
370371
public IClearMLQueueService ClearMLMonitorService { get; }
@@ -708,7 +709,8 @@ public override object ActivateJob(Type jobType)
708709
_env.SharedFileService,
709710
_env._lockFactory,
710711
_env.TrainSegmentPairs,
711-
new ParallelCorpusPreprocessingService(new TextCorpusService())
712+
new ParallelCorpusPreprocessingService(new TextCorpusService()),
713+
_env.BuildJobOptions
712714
)
713715
{
714716
TrainJobRunnerType = _env._trainJobRunnerType
@@ -718,8 +720,6 @@ public override object ActivateJob(Type jobType)
718720
{
719721
var engineOptions = Substitute.For<IOptionsMonitor<SmtTransferEngineOptions>>();
720722
engineOptions.CurrentValue.Returns(new SmtTransferEngineOptions());
721-
var buildJobOptions = Substitute.For<IOptionsMonitor<BuildJobOptions>>();
722-
buildJobOptions.CurrentValue.Returns(new BuildJobOptions());
723723
return new SmtTransferPostprocessBuildJob(
724724
_env.PlatformService,
725725
_env.Engines,
@@ -731,7 +731,7 @@ public override object ActivateJob(Type jobType)
731731
_env.TrainSegmentPairs,
732732
_env.SmtModelFactory,
733733
_env._truecaserFactory,
734-
buildJobOptions,
734+
_env.BuildJobOptions,
735735
engineOptions
736736
);
737737
}

src/Machine/test/Serval.Machine.Shared.Tests/Services/StatisticalEngineServiceTests.cs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,8 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp
179179
SharedFileService = new SharedFileService(Substitute.For<ILoggerFactory>());
180180
var clearMLOptions = Substitute.For<IOptionsMonitor<ClearMLOptions>>();
181181
clearMLOptions.CurrentValue.Returns(new ClearMLOptions());
182-
var buildJobOptions = Substitute.For<IOptionsMonitor<BuildJobOptions>>();
183-
buildJobOptions.CurrentValue.Returns(
182+
BuildJobOptions = Substitute.For<IOptionsMonitor<BuildJobOptions>>();
183+
BuildJobOptions.CurrentValue.Returns(
184184
new BuildJobOptions
185185
{
186186
ClearML =
@@ -219,7 +219,7 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp
219219
ClearMLService,
220220
SharedFileService,
221221
clearMLOptions,
222-
buildJobOptions,
222+
BuildJobOptions,
223223
Substitute.For<ILogger<ClearMLMonitorService>>()
224224
);
225225
BuildJobService = new BuildJobService<WordAlignmentEngine>(
@@ -228,7 +228,7 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp
228228
new ClearMLBuildJobRunner(
229229
ClearMLService,
230230
[new StatisticalClearMLBuildJobFactory(SharedFileService, Engines)],
231-
buildJobOptions
231+
BuildJobOptions
232232
)
233233
],
234234
Engines
@@ -252,6 +252,7 @@ [new StatisticalClearMLBuildJobFactory(SharedFileService, Engines)],
252252
public ISharedFileService SharedFileService { get; }
253253

254254
public IBuildJobService<WordAlignmentEngine> BuildJobService { get; }
255+
public IOptionsMonitor<BuildJobOptions> BuildJobOptions { get; }
255256

256257
public async Task CommitAsync(TimeSpan inactiveTimeout)
257258
{
@@ -455,7 +456,8 @@ public override object ActivateJob(Type jobType)
455456
Substitute.For<ILogger<WordAlignmentPreprocessBuildJob>>(),
456457
_env.BuildJobService,
457458
_env.SharedFileService,
458-
new ParallelCorpusPreprocessingService(new TextCorpusService())
459+
new ParallelCorpusPreprocessingService(new TextCorpusService()),
460+
_env.BuildJobOptions
459461
)
460462
{
461463
TrainJobRunnerType = _env._trainJobRunnerType
@@ -465,8 +467,6 @@ public override object ActivateJob(Type jobType)
465467
{
466468
var engineOptions = Substitute.For<IOptionsMonitor<StatisticalEngineOptions>>();
467469
engineOptions.CurrentValue.Returns(new StatisticalEngineOptions());
468-
var buildJobOptions = Substitute.For<IOptionsMonitor<BuildJobOptions>>();
469-
buildJobOptions.CurrentValue.Returns(new BuildJobOptions());
470470
return new StatisticalPostprocessBuildJob(
471471
_env.PlatformService,
472472
_env.Engines,
@@ -476,7 +476,7 @@ public override object ActivateJob(Type jobType)
476476
_env.SharedFileService,
477477
_env._lockFactory,
478478
_env.WordAlignmentModelFactory,
479-
buildJobOptions,
479+
_env.BuildJobOptions,
480480
engineOptions
481481
);
482482
}

0 commit comments

Comments
 (0)