diff --git a/.github/workflows/reset-branch.yml b/.github/workflows/reset-branch.yml index 76cf0b97eb..9eaf944c6e 100644 --- a/.github/workflows/reset-branch.yml +++ b/.github/workflows/reset-branch.yml @@ -1,5 +1,4 @@ name: Reset Current Branch to Upstream After Squash Merge - on: workflow_dispatch: inputs: diff --git a/README.md b/README.md index 58d2b7c14a..161da09a9e 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,31 @@ Please see the [MLPerf Inference benchmark paper](https://arxiv.org/abs/1911.025 Please see [here](https://docs.mlcommons.org/inference/benchmarks/) for the MLPerf inference documentation website which includes automated commands to run MLPerf inference benchmarks using different implementations. +## MLPerf Inference v5.0 (submission deadline February 28, 2025) + +For submissions, please use the master branch and any commit since the [5.0 seed release](https://github.com/mlcommons/inference/commit/5d83ed5de438ffb55bca4cdb2966fba90a9dbca6) although it is best to use the latest commit in the [master branch](https://github.com/mlcommons/inference). + +For power submissions please use [SPEC PTD 1.11.1](https://github.com/mlcommons/power) (needs special access) and any commit of the power-dev repository after the [code-freeze](https://github.com/mlcommons/power-dev/commit/65eedd4a60b5c50ac44cbae061d2a428e9fb190a) + + +| model | reference app | framework | dataset | category +| ---- | ---- | ---- | ---- | ---- | +| resnet50-v1.5 | [vision/classification_and_detection](https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection) | tensorflow, onnx, tvm, ncnn | imagenet2012 | edge,datacenter | +| retinanet 800x800 | [vision/classification_and_detection](https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection) | pytorch, onnx | openimages resized to 800x800| edge,datacenter | +| bert | [language/bert](https://github.com/mlcommons/inference/tree/master/language/bert) | tensorflow, pytorch, onnx | squad-1.1 | edge | +| dlrm-v2 | [recommendation/dlrm_v2](https://github.com/mlcommons/inference/tree/master/recommendation/dlrm_v2/pytorch) | pytorch | Multihot Criteo Terabyte | datacenter | +| 3d-unet | [vision/medical_imaging/3d-unet-kits19](https://github.com/mlcommons/inference/tree/master/vision/medical_imaging/3d-unet-kits19) | pytorch, tensorflow, onnx | KiTS19 | edge,datacenter | +| gpt-j | [language/gpt-j](https://github.com/mlcommons/inference/tree/master/language/gpt-j)| pytorch | CNN-Daily Mail | edge,datacenter | +| stable-diffusion-xl | [text_to_image](https://github.com/mlcommons/inference/tree/master/text_to_image) | pytorch | COCO 2014| edge,datacenter | +| llama2-70b | [language/llama2-70b](https://github.com/mlcommons/inference/tree/master/language/llama2-70b) | pytorch | OpenOrca | datacenter | +| llama3.1-405b | [language/llama3-405b](https://github.com/mlcommons/inference/tree/master/language/llama3.1-405b) | pytorch | LongBench, LongDataCollections, Ruler, GovReport | datacenter | +| mixtral-8x7b | [language/mixtral-8x7b](https://github.com/mlcommons/inference/tree/master/language/mixtral-8x7b) | pytorch | OpenOrca, MBXP, GSM8K | datacenter | +| rgat | [graph/rgat](https://github.com/mlcommons/inference/tree/master/graph/R-GAT) | pytorch | IGBH | datacenter | +| pointpainting | [automotive/3d-object-detection](https://github.com/mlcommons/inference/tree/master/automotive/3d-object-detection) | pytorch, onnx | Waymo Open Dataset | edge | + +* Framework here is given for the reference implementation. Submitters are free to use their own frameworks to run the benchmark. + + ## MLPerf Inference v4.1 (submission deadline July 26, 2024) For submissions, please use the master branch and any commit since the [4.1 seed release](https://github.com/mlcommons/inference/pull/1736/files) although it is best to use the latest commit. v4.1 tag will be created from the master branch after the result publication. diff --git a/automotive/3d-object-detection/main.py b/automotive/3d-object-detection/main.py index caafd9e47c..b66a1878d1 100644 --- a/automotive/3d-object-detection/main.py +++ b/automotive/3d-object-detection/main.py @@ -111,10 +111,6 @@ def get_args(): help="device to run the benchmark", ) - # file to use mlperf rules compliant parameters - parser.add_argument( - "--mlperf_conf", default="mlperf.conf", help="mlperf rules config" - ) # file for user LoadGen settings such as target QPS parser.add_argument( "--user_conf", @@ -350,11 +346,6 @@ def main(): "cmdline": str(args), } - mlperf_conf = os.path.abspath(args.mlperf_conf) - if not os.path.exists(mlperf_conf): - log.error("{} not found".format(mlperf_conf)) - sys.exit(1) - user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) diff --git a/compliance/nvidia/README.md b/compliance/nvidia/README.md index bcf050a99b..ed90893704 100755 --- a/compliance/nvidia/README.md +++ b/compliance/nvidia/README.md @@ -41,3 +41,4 @@ The `run_verification.py` found in each test directory will copy the test files | Llama3.1-405b | [TEST06](./TEST06/) | | mixtral-8x7b | [TEST06](./TEST06/) | | R-GAT | [TEST01](./TEST01/) | +| PointPainting | [TEST01](./TEST01/) [TEST04](./TEST04/) | diff --git a/compliance/nvidia/TEST01/pointpainting/audit.config b/compliance/nvidia/TEST01/pointpainting/audit.config new file mode 100644 index 0000000000..03e70a4c7a --- /dev/null +++ b/compliance/nvidia/TEST01/pointpainting/audit.config @@ -0,0 +1,9 @@ +# The format of this config file is 'key = value'. +# The key has the format 'model.scenario.key'. Value is mostly int64_t. +# Model maybe '*' as wildcard. In that case the value applies to all models. +# All times are in milli seconds + +# mode dictionary (0 = submission, 1 = accuracy, 2 = performance, 3 = find peak perf) +*.*.mode = 2 +*.*.accuracy_log_rng_seed = 720381539243781796 +*.*.accuracy_log_sampling_target = 256 diff --git a/language/mixtral-8x7b/SUT.py b/language/mixtral-8x7b/SUT.py index e12a4318fa..36cf7e3c01 100644 --- a/language/mixtral-8x7b/SUT.py +++ b/language/mixtral-8x7b/SUT.py @@ -30,11 +30,12 @@ log = logging.getLogger("Mixtral-8x7B-Instruct-v0.1") gen_kwargs = { - "early_stopping": True, - "max_new_tokens": 1024, + # "min_new_tokens": 1, "min_new_tokens": 2, - "num_beams": 1, + "max_new_tokens": 1024, "do_sample": False, + "temperature": None, + "top_p": None, } @@ -238,80 +239,32 @@ def process_queries(self): input_masks_tensor = [] input_len = [] input_dataset = [] + batch_texts = [] + datasets = [] for q in qitem: - input_ids_tensor.append( - pad( - self.data_object.input_ids[q.index], - ( - max_seq_len - - self.data_object.input_lens[q.index], - 0, - 0, - 0, - ), - value=self.tokenizer.pad_token_id, - ) - ) - input_masks_tensor.append( - pad( - self.data_object.attention_masks[q.index], - ( - max_seq_len - - self.data_object.input_lens[q.index], - 0, - 0, - 0, - ), - value=0, - ) - ) + batch_texts.append(self.data_object.input_texts[q.index]) input_len.append(self.data_object.input_lens[q.index]) - # In case we predict code generation, we can specify an # additional stop sequence input_dataset.append( self.data_object.dataset_names[q.index]) - input_ids_tensor = torch.cat(input_ids_tensor) - input_masks_tensor = torch.cat(input_masks_tensor) - assert input_ids_tensor.shape == input_masks_tensor.shape - assert input_ids_tensor.shape[0] <= self.batch_size + batch_ids = self.tokenizer.batch_encode_plus( + batch_texts, return_tensors="pt", padding=True) + batch_ids = batch_ids.to(self.device) tik2 = time.time() - logits_processor = LogitsProcessorList( - [StopAfterSequence( - self.tokenizer.eos_token_id, device=self.device)] - ) - for i in range(len(input_ids_tensor)): - ids, masks, dataset = ( - input_ids_tensor[i: i + 1], - input_masks_tensor[i: i + 1], - input_dataset[i], - ) - pred_output_tokens = [] - if dataset == "MBXP": - out = self.model.generate( - input_ids=ids, - attention_mask=masks, - pad_token_id=self.tokenizer.pad_token_id, - logits_processor=logits_processor, - **gen_kwargs, - ) - else: - out = self.model.generate( - input_ids=ids, - attention_mask=masks, - pad_token_id=self.tokenizer.pad_token_id, - **gen_kwargs, - ) - pred_output_tokens.append(out) - pred_output_tokens = torch.cat(pred_output_tokens) + _, length = batch_ids.input_ids.shape + out = self.model.generate( + **batch_ids, num_return_sequences=1, **gen_kwargs) + pred_output_tokens = out tik3 = time.time() processed_output = self.data_object.postProcess( pred_output_tokens, - input_seq_lens=input_len, + length=length, query_id_list=query_ids, + dataset_list=input_dataset, ) for i in range(len(qitem)): @@ -342,10 +295,7 @@ def process_queries(self): def load_model(self): self.model = AutoModelForCausalLM.from_pretrained( - self.model_path, - device_map="auto", - low_cpu_mem_usage=True, - torch_dtype=self.amp_dtype, + self.model_path, device_map="auto", trust_remote_code=True ) print("Loaded model") @@ -362,10 +312,7 @@ def load_model(self): pass self.tokenizer = AutoTokenizer.from_pretrained( - self.model_path, - model_max_length=1024, - padding_side="left", - use_fast=False, + self.model_path, padding_side="left", trust_remote_code=True ) self.tokenizer.pad_token = self.tokenizer.eos_token diff --git a/language/mixtral-8x7b/dataset.py b/language/mixtral-8x7b/dataset.py index c8268d8d1d..34b965e68d 100644 --- a/language/mixtral-8x7b/dataset.py +++ b/language/mixtral-8x7b/dataset.py @@ -67,6 +67,7 @@ def load_processed_dataset(self): processed_data = pd.read_pickle(self.dataset_path) input_tokens = processed_data["tok_input"] + self.input_texts = processed_data["input"].to_list() self.input_ids = [] self.input_lens = [] @@ -85,12 +86,28 @@ def load_processed_dataset(self): self.dataset_names.append(dataset) print("Finished loading dataset.") + def remove_trailing_twos(self, lst, eos=2): + count = 0 + for num in reversed(lst): + if num == eos or num == 0: + count += 1 + else: + break + return lst[:-count] if count > 0 else lst + + def mbxp_stop(self, lst, stop_tokens=[13, 13940, 28832, 13]): + for i in range(len(lst) - len(stop_tokens) + 1): + if (lst[i:i + len(stop_tokens)] == stop_tokens).all(): + return lst[:i + len(stop_tokens)] + return lst + def postProcess( self, out_tokens, - input_seq_lens=None, + length=None, query_id_list=None, sample_index_list=None, + dataset_list=None, ): """Postprocesses output prediction""" @@ -106,13 +123,14 @@ def postProcess( """ # Everything is padded to max_len (1024), so prune the input and parse # to numpy - output_seq = out_tokens[:, 1024:].cpu().numpy() + output_seq = out_tokens[:, length:].cpu().numpy() aux_seq = [] assert len(query_id_list) == output_seq.shape[0] for i in range(len(output_seq)): aux = output_seq[i] - while len(output_seq[i]) <= 1: - aux = np.append(aux, self.tokenizer.eos_token_id) + aux = self.remove_trailing_twos(aux) + if (dataset_list[i] == "MBXP"): + aux = self.mbxp_stop(aux) aux_seq.append(aux) output_seq = np.stack(aux_seq) diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt index 2713f1415e..4b57201ae1 100644 --- a/loadgen/VERSION.txt +++ b/loadgen/VERSION.txt @@ -1 +1 @@ -5.0.13 +5.0.14 diff --git a/loadgen/mlperf.conf b/loadgen/mlperf.conf index 582381bbd9..57190c6c37 100644 --- a/loadgen/mlperf.conf +++ b/loadgen/mlperf.conf @@ -19,6 +19,7 @@ llama2-70b-interactive.*.performance_sample_count_override = 24576 llama3_1-405b.*.performance_sample_count_override = 8313 stable-diffusion-xl.*.performance_sample_count_override = 5000 rgat.*.performance_sample_count_override = 788379 +pointpainting.*.performance_sample_count_override = 1024 # set to 0 to let entire sample set to be performance sample 3d-unet.*.performance_sample_count_override = 0 @@ -26,13 +27,14 @@ rgat.*.performance_sample_count_override = 788379 *.*.qsl_rng_seed = 6023615788873153749 *.*.sample_index_rng_seed = 15036839855038426416 *.*.schedule_rng_seed = 9933818062894767841 -# Set seeds for TEST_05. + +# Set seeds for TEST_05 (not needed from v5.0 onwards) *.*.test05_qsl_rng_seed = 7975553102935885558 *.*.test05_sample_index_rng_seed = 11403566307062068064 *.*.test05_schedule_rng_seed = 15816800565822761601 - *.SingleStream.target_latency_percentile = 90 +pointpainting.SingleStream.target_latency_percentile = 99.9 *.SingleStream.min_duration = 600000 *.MultiStream.target_latency_percentile = 99 diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index 264c9b3738..793c534714 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -271,7 +271,7 @@ "mixtral-8x7b", "llama3.1-405b", "rgat", - # TODO: add automotive? + "pointpainting", ], "required-scenarios-datacenter": { "resnet": ["Server", "Offline"], @@ -302,6 +302,7 @@ "gptj-99": ["SingleStream", "Offline"], "gptj-99.9": ["SingleStream", "Offline"], "stable-diffusion-xl": ["SingleStream", "Offline"], + "pointpainting": ["SingleStream"], }, "optional-scenarios-edge": {}, "required-scenarios-datacenter-edge": { @@ -323,6 +324,7 @@ "mixtral-8x7b": ["Server", "Offline"], "llama3.1-405b": ["Server", "Offline"], "rgat": ["Offline"], + "pointpainting": ["SingleStream"], }, "optional-scenarios-datacenter-edge": {}, "accuracy-target": { @@ -424,6 +426,7 @@ 684.68 * 0.9, ), "rgat": ("acc", 0.7286 * 0.99), + "pointpainting": ("mAP", 0.5425 * 0.999), }, "accuracy-upper-limit": { "stable-diffusion-xl": ( @@ -460,8 +463,8 @@ "stable-diffusion-xl": 5000, "mixtral-8x7b": 15000, "llama3.1-405b": 8313, - "rgat": 788379 - + "rgat": 788379, + "pointpainting": 1024, }, # model_mapping.json is expected in the root directory of the # submission folder for open submissions and so the below dictionary is @@ -527,7 +530,8 @@ }, "mixtral-8x7b": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, "llama3.1-405b": {"SingleStream": 1024, "Server": 270336, "Offline": 1}, - "rgat": {"SingleStream": 1024, "Offline": 1} + "rgat": {"SingleStream": 1024, "Offline": 1}, + "pointpainting": {"SingleStream": 1024}, }, }, } @@ -731,7 +735,7 @@ ACC_PATTERN = { "acc": r"^(?:\{\"accuracy|accuracy)[\": ]*=?\s*([\d\.]+).*", "AUC": r"^AUC=([\d\.]+).*", - "mAP": r"^mAP=([\d\.]+).*", + "mAP": r".*'(?:mAP|Total)':\s*([\d\.]+)", "bleu": r"^BLEU\:\s*([\d\.]+).*", "F1": r"^{[\"\']exact_match[\"\']\:\s*[\d\.]+,\s*[\"\']f1[\"\']\:\s*([\d\.]+)}", "WER": r"Word Error Rate\:.*, accuracy=([0-9\.]+)%", diff --git a/vision/classification_and_detection/python/backend_pytorch_native.py b/vision/classification_and_detection/python/backend_pytorch_native.py index db152f8e00..23a1a7f9ca 100755 --- a/vision/classification_and_detection/python/backend_pytorch_native.py +++ b/vision/classification_and_detection/python/backend_pytorch_native.py @@ -3,7 +3,7 @@ """ # pylint: disable=unused-argument,missing-docstring -import torch # currently supports pytorch1.0 +import torch import torchvision import backend @@ -25,7 +25,7 @@ def image_format(self): return "NCHW" def load(self, model_path, inputs=None, outputs=None): - self.model = torch.load(model_path) + self.model = torch.load(model_path, weights_only=False) self.model.eval() # find inputs from the model if not passed in by config if inputs: