|
| 1 | +// Copyright (c) Microsoft Corporation. |
| 2 | +// Licensed under the MIT License. |
| 3 | + |
| 4 | +namespace VirtualClient.Actions |
| 5 | +{ |
| 6 | + using System; |
| 7 | + using System.Collections.Generic; |
| 8 | + using System.Diagnostics; |
| 9 | + using System.IO; |
| 10 | + using System.Linq; |
| 11 | + using System.Reflection; |
| 12 | + using System.Threading; |
| 13 | + using System.Threading.Tasks; |
| 14 | + using Microsoft.Extensions.DependencyInjection; |
| 15 | + using Moq; |
| 16 | + using Newtonsoft.Json; |
| 17 | + using Newtonsoft.Json.Linq; |
| 18 | + using NUnit.Framework; |
| 19 | + using Polly; |
| 20 | + using VirtualClient.Common; |
| 21 | + using VirtualClient.Common.Contracts; |
| 22 | + using VirtualClient.Common.Telemetry; |
| 23 | + using VirtualClient.Contracts; |
| 24 | + |
| 25 | + [TestFixture] |
| 26 | + [Category("Unit")] |
| 27 | + public class MLPerfTrainingExecutorTests |
| 28 | + { |
| 29 | + private DependencyFixture mockFixture; |
| 30 | + private DependencyPath mockPackage; |
| 31 | + private IEnumerable<Disk> disks; |
| 32 | + private string output; |
| 33 | + private List<string> commandsExecuted = new List<string>(); |
| 34 | + |
| 35 | + [SetUp] |
| 36 | + public void SetupTests() |
| 37 | + { |
| 38 | + this.mockFixture = new DependencyFixture(); |
| 39 | + this.SetupDefaultMockBehavior(PlatformID.Unix); |
| 40 | + } |
| 41 | + |
| 42 | + [Test] |
| 43 | + public async Task MLPerfTrainingExecutorInitializesWorkloadAsExpected() |
| 44 | + { |
| 45 | + List<string> expectedCommands = new List<string> |
| 46 | + { |
| 47 | + "sudo usermod -aG docker anyuser", |
| 48 | + "sudo docker build --pull -t mlperf-training-anyuser-x86_64:language_model .", |
| 49 | + "sudo docker run --runtime=nvidia mlperf-training-anyuser-x86_64:language_model" |
| 50 | + }; |
| 51 | + |
| 52 | + using (TestMLPerfTrainingExecutor MLPerfTrainingExecutor = new TestMLPerfTrainingExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) |
| 53 | + { |
| 54 | + await MLPerfTrainingExecutor.InitializeAsync(EventContext.None, CancellationToken.None).ConfigureAwait(false); |
| 55 | + } |
| 56 | + |
| 57 | + CollectionAssert.AreEqual(expectedCommands, commandsExecuted); |
| 58 | + } |
| 59 | + |
| 60 | + [Test] |
| 61 | + public async Task MLPerfTrainingExecutorExecutesAsExpected() |
| 62 | + { |
| 63 | + IEnumerable<string> expectedCommands = this.GetExpectedCommands(); |
| 64 | + |
| 65 | + using (TestMLPerfTrainingExecutor MLPerfTrainingExecutor = new TestMLPerfTrainingExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) |
| 66 | + { |
| 67 | + await MLPerfTrainingExecutor.InitializeAsync(EventContext.None, CancellationToken.None).ConfigureAwait(false); |
| 68 | + await MLPerfTrainingExecutor.ExecuteAsync(EventContext.None, CancellationToken.None).ConfigureAwait(false); |
| 69 | + } |
| 70 | + |
| 71 | + CollectionAssert.AreEqual(expectedCommands.ToArray(), commandsExecuted); |
| 72 | + } |
| 73 | + |
| 74 | + private void SetupDefaultMockBehavior(PlatformID platformID) |
| 75 | + { |
| 76 | + this.commandsExecuted = new List<string>(); |
| 77 | + this.mockFixture = new DependencyFixture(); |
| 78 | + this.mockFixture.Setup(platformID); |
| 79 | + this.mockPackage = new DependencyPath("MLPerfTraining", this.mockFixture.PlatformSpecifics.GetPackagePath("mlperf")); |
| 80 | + |
| 81 | + this.disks = this.mockFixture.CreateDisks(PlatformID.Unix, true); |
| 82 | + this.mockFixture.DiskManager.AddRange(this.disks); |
| 83 | + this.mockFixture.SetupWorkloadPackage("mlperftraining", expectedFiles: @"win-x64\diskspd.exe"); |
| 84 | + |
| 85 | + this.mockFixture.Parameters = new Dictionary<string, IConvertible>() |
| 86 | + { |
| 87 | + { nameof(MLPerfTrainingExecutor.Username), "anyuser" }, |
| 88 | + { nameof(MLPerfTrainingExecutor.Model), "bert" }, |
| 89 | + { nameof(MLPerfTrainingExecutor.BatchSize), "45"}, |
| 90 | + { nameof(MLPerfTrainingExecutor.Implementation), "pytorch-22.09"}, |
| 91 | + { nameof(MLPerfTrainingExecutor.ContainerName), "language_model"}, |
| 92 | + { nameof(MLPerfTrainingExecutor.DataPath), "mlperf-training-data-bert.1.0.0"}, |
| 93 | + { nameof(MLPerfTrainingExecutor.GPUCount), "8"}, |
| 94 | + { nameof(MLPerfTrainingExecutor.Scenario), "training-mlperf-bert-batchsize-45-gpu-8"}, |
| 95 | + { nameof(MLPerfTrainingExecutor.ConfigFile), "config_DGXA100_1x8x56x1.sh"}, |
| 96 | + { nameof(MLPerfTrainingExecutor.PackageName), "mlperftraining"} |
| 97 | + }; |
| 98 | + |
| 99 | + string workingDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location); |
| 100 | + string outputPath = Path.Combine(workingDirectory, @"Examples\MLPerfTraining\Example_bert_real_output.txt"); |
| 101 | + this.output = File.ReadAllText(outputPath); |
| 102 | + |
| 103 | + this.mockFixture.ProcessManager.OnCreateProcess = (command, arguments, workingDir) => |
| 104 | + { |
| 105 | + IProcessProxy process = this.mockFixture.CreateProcess(command, arguments, workingDir); |
| 106 | + this.commandsExecuted.Add($"{command} {arguments}".Trim()); |
| 107 | + process.StandardOutput.Append(this.output); |
| 108 | + |
| 109 | + return process; |
| 110 | + }; |
| 111 | + } |
| 112 | + |
| 113 | + private IEnumerable<string> GetExpectedCommands() |
| 114 | + { |
| 115 | + List<string> commands = null; |
| 116 | + commands = new List<string> |
| 117 | + { |
| 118 | + "sudo usermod -aG docker anyuser", |
| 119 | + "sudo docker build --pull -t mlperf-training-anyuser-x86_64:language_model .", |
| 120 | + "sudo docker run --runtime=nvidia mlperf-training-anyuser-x86_64:language_model", |
| 121 | + "sudo su -c \"source config_DGXA100_1x8x56x1.sh; env BATCHSIZE=45 DGXNGPU=8 CUDA_VISIBLE_DEVICES=\"0,1,2,3,4,5,6,7\" CONT=mlperf-training-anyuser-x86_64:language_model DATADIR=/mlperftraining0/mlperf-training-data-bert.1.0.0/mlperf-training-package/hdf5/training-4320 DATADIR_PHASE2=/mlperftraining0/mlperf-training-data-bert.1.0.0/mlperf-training-package/hdf5/training-4320 EVALDIR=/mlperftraining0/mlperf-training-data-bert.1.0.0/mlperf-training-package/hdf5/eval_varlength CHECKPOINTDIR=/mlperftraining0/mlperf-training-data-bert.1.0.0/mlperf-training-package/phase1 CHECKPOINTDIR_PHASE1=/mlperftraining0/mlperf-training-data-bert.1.0.0/mlperf-training-package/phase1 ./run_with_docker.sh\"" |
| 122 | + }; |
| 123 | + |
| 124 | + return commands; |
| 125 | + } |
| 126 | + |
| 127 | + protected class TestMLPerfTrainingExecutor : MLPerfTrainingExecutor |
| 128 | + { |
| 129 | + public TestMLPerfTrainingExecutor(IServiceCollection dependencies, IDictionary<string, IConvertible> parameters) |
| 130 | + : base(dependencies, parameters) |
| 131 | + { |
| 132 | + } |
| 133 | + |
| 134 | + public new Task ExecuteAsync(EventContext context, CancellationToken cancellationToken) |
| 135 | + { |
| 136 | + return base.ExecuteAsync(context, cancellationToken); |
| 137 | + } |
| 138 | + |
| 139 | + public new Task InitializeAsync(EventContext context, CancellationToken cancellationToken) |
| 140 | + { |
| 141 | + return base.InitializeAsync(context, cancellationToken); |
| 142 | + } |
| 143 | + |
| 144 | + public new string GetContainerName() |
| 145 | + { |
| 146 | + return base.GetContainerName(); |
| 147 | + } |
| 148 | + } |
| 149 | + } |
| 150 | +} |
0 commit comments