diff --git a/.ci/benchmark.py b/.ci/benchmark.py new file mode 100644 index 0000000..ccd0268 --- /dev/null +++ b/.ci/benchmark.py @@ -0,0 +1,15 @@ +import os +import sys +from src.benchmark.utils import read_metrics, to_markdown_table + +if __name__ == "__main__": + # Generate statistics report + statistics_path = sys.argv[1] + metrics = read_metrics(statistics_path, metric="accuracy") + html_table = to_markdown_table(metrics) + + # Write to workflow job summary + summary_path = os.environ["GITHUB_STEP_SUMMARY"] + with open(summary_path, "a") as f: + f.write("## Torchbenchmark statistics report\n") + f.write(html_table) diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml new file mode 100644 index 0000000..0449354 --- /dev/null +++ b/.github/workflows/_ascend_npu_benchmark.yml @@ -0,0 +1,148 @@ +name: '_ascend_npu_benchmark' + +on: + workflow_call: + inputs: + runner: + required: true + type: string + description: 'The runner selected to run on' + image: + required: true + type: string + description: 'The docker image which will be loaded' + device: + required: true + type: string + description: 'The device selected to run on' + artifact_name: + required: true + type: string + description: 'The torch_npu distribution artifact name' + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +jobs: + test: + name: run benchmarks for torch_npu + runs-on: ${{ inputs.runner }} + container: + image: ${{ inputs.image }} + volumes: + - /usr/local/dcmi:/usr/local/dcmi + - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi + - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ + - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info + - /etc/ascend_install.info:/etc/ascend_install.info + options: >- + --network host + --device ${{ inputs.device }} + --device /dev/davinci_manager + --device /dev/devmm_svm + --device /dev/hisi_hdc + env: + HTTP_PROXY: http://127.0.0.1:10809 + HTTPS_PROXY: http://127.0.0.1:10809 + ALL_PROXY: socks5://127.0.0.1:10808 + SOCKS_PROXY: socks5://127.0.0.1:10808 + steps: + - name: Show NPU info + run: | + npu-smi info + + - name: Config mirrors + run: | + sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list + pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + + - name: Install system dependencies + run: | + apt update + apt install --no-install-recommends -y \ + git gcc g++ make cmake ninja-build curl \ + libgl1 libglib2.0-0 libsndfile1 + + - name: Checkout + uses: actions/checkout@v4 + + # TODO(shink): Update once PR merged + # https://github.com/pytorch/benchmark/pull/2550 + - name: Checkout benchmark + uses: actions/checkout@v4 + with: + repository: shink/benchmark + ref: feat/test_bench/continue_on_error + path: benchmark + + - name: Download ${{ inputs.artifact_name }} + uses: actions/download-artifact@v4 + with: + name: ${{ inputs.artifact_name }} + path: ascend_npu + + - name: Install torch_npu + working-directory: ascend_npu + run: | + pip install ${{ inputs.artifact_name }} + + - name: Install torch_npu dependencies + working-directory: ascend_npu + run: | + curl -fsSL -O https://raw.githubusercontent.com/Ascend/pytorch/refs/heads/master/requirements.txt + pip install -r requirements.txt + + - name: Install benchmark dependencies + run: | + pip install -r benchmark/requirements.txt --constraint ascend_npu/requirements.txt "numpy==1.*" + python benchmark/install.py --userbenchmark test_bench --continue_on_fail + + - name: Install project dependencies + run: | + pip install -r requirements.txt + + - name: Show environment info + run: | + npu_is_available=$(python -c "import torch; print(torch.npu.is_available())") + npu_count=$(python -c "import torch; print(torch.npu.device_count())") + echo "NPU is available: ${npu_is_available}" + echo "NPU count: ${npu_count}" + pip list | grep -E 'torch|numpy' + + - name: Run benchmarks + working-directory: benchmark + run: | + python run_benchmark.py test_bench --accuracy --device npu --test eval \ + --output ascend_npu_benchmark.json + + - name: Upload the output file + id: upload-output + uses: actions/upload-artifact@v4 + with: + name: ascend_npu_benchmark.json + path: benchmark/ascend_npu_benchmark.json + if-no-files-found: error + retention-days: 1 + overwrite: true + + - name: Write to workflow job summary + id: report + run: | + set -x + realpath benchmark/ascend_npu_benchmark.json + ls benchmark + cat benchmark/ascend_npu_benchmark.json + + output_path=$(realpath benchmark/ascend_npu_benchmark.json) + python .ci/benchmark.py ${output_path} + + # TODO(shink) + - name: Update README.md + if: ${{ github.event_name == 'push' }} + run: | + echo "${{ github.event_name }}" + echo "${{ github.event_name == 'push' }}" diff --git a/.github/workflows/_ascend_npu_build.yml b/.github/workflows/_ascend_npu_build.yml index dfcfc5e..249fbc2 100644 --- a/.github/workflows/_ascend_npu_build.yml +++ b/.github/workflows/_ascend_npu_build.yml @@ -29,6 +29,13 @@ jobs: runs-on: ${{ inputs.runner }} container: image: ${{ inputs.image }} + options: >- + --network host + env: + HTTP_PROXY: http://127.0.0.1:10809 + HTTPS_PROXY: http://127.0.0.1:10809 + ALL_PROXY: socks5://127.0.0.1:10808 + SOCKS_PROXY: socks5://127.0.0.1:10808 outputs: dist_name: ${{ steps.list-dist.outputs.dist_name }} steps: diff --git a/.github/workflows/_ascend_npu_test.yml b/.github/workflows/_ascend_npu_test.yml index 37932ce..fcfcac8 100644 --- a/.github/workflows/_ascend_npu_test.yml +++ b/.github/workflows/_ascend_npu_test.yml @@ -29,7 +29,7 @@ defaults: jobs: test: - name: test torch_npu in ${{ inputs.image }} with ${{ inputs.device }} + name: test torch_npu runs-on: ${{ inputs.runner }} container: image: ${{ inputs.image }} @@ -45,6 +45,11 @@ jobs: --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc + env: + HTTP_PROXY: http://127.0.0.1:10809 + HTTPS_PROXY: http://127.0.0.1:10809 + ALL_PROXY: socks5://127.0.0.1:10808 + SOCKS_PROXY: socks5://127.0.0.1:10808 steps: - name: Show NPU info run: | @@ -67,7 +72,6 @@ jobs: # repository: Ascend/pytorch repository: shink/torchnpu ref: feat/autoload - submodules: recursive path: torch_npu - name: Download distribution artifact diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index f0e226b..5d60ee9 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -48,7 +48,7 @@ on: - /dev/davinci6 - /dev/davinci7 - /dev/davinci8 - default: '/dev/davinci6' + default: '/dev/davinci5' description: 'The device selected to run on' # Only cancel the previous runs when triggered by a pull request @@ -70,7 +70,7 @@ jobs: run: | set -e echo "runner=${{ github.event.inputs.runner || 'self-hosted' }}" >> $GITHUB_OUTPUT - echo "device=${{ github.event.inputs.device || '/dev/davinci6' }}" >> $GITHUB_OUTPUT + echo "device=${{ github.event.inputs.device || '/dev/davinci5' }}" >> $GITHUB_OUTPUT echo "image=${{ github.event.inputs.image || 'ascendai/cann:latest' }}" >> $GITHUB_OUTPUT build: @@ -93,3 +93,16 @@ jobs: image: ${{ needs.prepare.outputs.image }} device: ${{ needs.prepare.outputs.device }} artifact_name: ${{ needs.build.outputs.artifact_name }} + + benchmark: + name: Run benchmarks + needs: + - prepare + - build + - test + uses: ./.github/workflows/_ascend_npu_benchmark.yml + with: + runner: ${{ needs.prepare.outputs.runner }} + image: ${{ needs.prepare.outputs.image }} + device: ${{ needs.prepare.outputs.device }} + artifact_name: ${{ needs.build.outputs.artifact_name }} diff --git a/.gitignore b/.gitignore index 485dee6..2e54bfb 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ .idea +.benchmarks +__pycache__ diff --git a/README.md b/README.md index 73a9037..f30065e 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,113 @@ across various devices by running comprehensive GitHub workflows. ## Accelerator Integration Test Results -| [torch_npu][1] | -|----------------------------------| -| [![Ascend NPU Test Suite][2]][3] | + + +| | [torch_npu][1] | +|---------------------------------|----------------| +| simple_gpt | ❌ | +| detectron2_fasterrcnn_r_50_dc5 | ❌ | +| LearningToPaint | ✅ | +| hf_GPT2_large | ✅ | +| dcgan | ✅ | +| nanogpt | ✅ | +| fastNLP_Bert | ✅ | +| moondream | ❌ | +| mobilenet_v2_quantized_qat | ❌ | +| functorch_dp_cifar10 | ✅ | +| simple_gpt_tp_manual | ❌ | +| speech_transformer | ✅ | +| yolov3 | ✅ | +| resnet50_quantized_qat | ❌ | +| sam_fast | ❌ | +| alexnet | ✅ | +| timm_efficientnet | ✅ | +| pyhpc_isoneutral_mixing | ✅ | +| basic_gnn_edgecnn | ✅ | +| nvidia_deeprecommender | ❌ | +| opacus_cifar10 | ✅ | +| dlrm | ✅ | +| hf_Bert | ✅ | +| hf_T5_generate | ✅ | +| resnet50 | ✅ | +| hf_BigBird | ✅ | +| resnext50_32x4d | ✅ | +| pyhpc_turbulent_kinetic_energy | ✅ | +| llama | ✅ | +| detectron2_maskrcnn_r_50_c4 | ❌ | +| Super_SloMo | ✅ | +| moco | ❌ | +| stable_diffusion_unet | ❌ | +| microbench_unbacked_tolist_sum | ✅ | +| detectron2_maskrcnn_r_101_c4 | ❌ | +| hf_distil_whisper | ✅ | +| mnasnet1_0 | ✅ | +| detectron2_fasterrcnn_r_50_fpn | ❌ | +| timm_resnest | ✅ | +| hf_GPT2 | ✅ | +| squeezenet1_1 | ✅ | +| basic_gnn_gin | ✅ | +| hf_clip | ✅ | +| mobilenet_v2 | ✅ | +| drq | ✅ | +| hf_Roberta_base | ✅ | +| detectron2_maskrcnn_r_50_fpn | ❌ | +| timm_nfnet | ✅ | +| timm_vovnet | ✅ | +| doctr_det_predictor | ✅ | +| sam | ✅ | +| hf_T5_large | ✅ | +| mobilenet_v3_large | ✅ | +| detectron2_fcos_r_50_fpn | ❌ | +| soft_actor_critic | ✅ | +| llava | ❌ | +| timm_regnet | ✅ | +| functorch_maml_omniglot | ✅ | +| detectron2_fasterrcnn_r_101_c4 | ❌ | +| hf_DistilBert | ✅ | +| tts_angular | ✅ | +| detectron2_maskrcnn | ❌ | +| basic_gnn_sage | ✅ | +| tacotron2 | ❌ | +| detectron2_maskrcnn_r_101_fpn | ❌ | +| lennard_jones | ✅ | +| pytorch_unet | ✅ | +| vgg16 | ✅ | +| BERT_pytorch | ✅ | +| timm_efficientdet | ❌ | +| pyhpc_equation_of_state | ✅ | +| maml | ✅ | +| detectron2_fasterrcnn_r_50_c4 | ❌ | +| resnet152 | ✅ | +| phlippe_densenet | ✅ | +| maml_omniglot | ✅ | +| phlippe_resnet | ✅ | +| pytorch_CycleGAN_and_pix2pix | ✅ | +| hf_Whisper | ✅ | +| hf_T5 | ✅ | +| densenet121 | ✅ | +| cm3leon_generate | ✅ | +| detectron2_fasterrcnn_r_101_fpn | ❌ | +| hf_Bert_large | ✅ | +| stable_diffusion_text_encoder | ❌ | +| hf_Reformer | ❌ | +| detectron2_fasterrcnn_r_101_dc5 | ❌ | +| demucs | ✅ | +| pytorch_stargan | ✅ | +| hf_T5_base | ✅ | +| torch_multimodal_clip | ✅ | +| vision_maskrcnn | ❌ | +| timm_vision_transformer_large | ✅ | +| hf_Bart | ✅ | +| shufflenet_v2_x1_0 | ✅ | +| llama_v2_7b_16h | ❌ | +| basic_gnn_gcn | ✅ | +| resnet18 | ✅ | +| Background_Matting | ✅ | +| doctr_reco_predictor | ✅ | +| timm_vision_transformer | ✅ | +| hf_Albert | ✅ | +| hf_Longformer | ✅ | [1]: https://github.com/ascend/pytorch @@ -17,6 +121,8 @@ across various devices by running comprehensive GitHub workflows. [3]: https://github.com/cosdt/pytorch-integration-tests/actions/workflows/ascend_npu_test.yml + + ## Overview This repository contains workflows and scripts that automate the testing diff --git a/ascend_npu/README.md b/ascend_npu/README.md new file mode 100644 index 0000000..ba18d81 --- /dev/null +++ b/ascend_npu/README.md @@ -0,0 +1,2 @@ +# Huawei Ascend NPU + diff --git a/ascend_npu/metadata.json b/ascend_npu/metadata.json new file mode 100644 index 0000000..f41a975 --- /dev/null +++ b/ascend_npu/metadata.json @@ -0,0 +1,11 @@ +{ + "device": "npu", + "test": [ + "train", + "eval" + ], + "models": [ + "BERT_pytorch", + "hf_GPT2" + ] +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a5d5159 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +tabulate diff --git a/src/benchmark/__init__.py b/src/benchmark/__init__.py new file mode 100644 index 0000000..16281fe --- /dev/null +++ b/src/benchmark/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/src/benchmark/utils.py b/src/benchmark/utils.py new file mode 100644 index 0000000..b50968a --- /dev/null +++ b/src/benchmark/utils.py @@ -0,0 +1,108 @@ +import ast +import dataclasses +import json +import pathlib +from typing import Dict, List, Optional + + +# Same as torchbenchmark.util.experiment.instantiator.TorchBenchModelConfig +# https://github.com/pytorch/benchmark/blob/main/torchbenchmark/util/experiment/instantiator.py#L26 +@dataclasses.dataclass +class TorchBenchModelConfig: + name: str + test: str + device: str + batch_size: Optional[int] + extra_args: List[str] + extra_env: Optional[Dict[str, str]] = None + output_dir: Optional[pathlib.Path] = None + + +@dataclasses.dataclass +class TorchBenchModelMetric: + key: TorchBenchModelConfig + value: str + + +def read_json(path: str) -> dict: + with open(path, 'r') as f: + data = json.load(f) + return data + + +def save_file(path: str, data) -> None: + with open(path, 'w') as file: + file.write(data) + + +def parse_to_dict(config_str: str): + """ + Parse a string (like 'key1=value1, key2=value2, ...') into a dict + """ + items = config_str.split(", ") + config = {} + + for item in items: + key, value = item.split("=", 1) + try: + config[key] = ast.literal_eval(value) + except (ValueError, SyntaxError): + config[key] = value + + return config + + +def read_metrics(path: str, *, metric=None) -> List[TorchBenchModelMetric]: + output = read_json(path) + metrics_data = output.get('metrics', {}) + + metrics = [] + for metric_key, metric_value in metrics_data.items(): + key_dict = parse_to_dict(metric_key) + if metric is None or metric == key_dict["metric"]: + config = TorchBenchModelConfig( + name=key_dict.get("model"), + test=key_dict.get("test"), + device=key_dict.get("device"), + batch_size=key_dict.get("batch_size"), + extra_args=key_dict.get("extra_args"), + extra_env=key_dict.get("extra_env"), + output_dir=key_dict.get("output_dir"), + ) + model_metric = TorchBenchModelMetric(config, metric_value) + metrics.append(model_metric) + return metrics + + +def generate_table_rows(metrics: List[TorchBenchModelMetric]): + models = list({metric.key.name for metric in metrics}) + devices = list({metric.key.device for metric in metrics}) + + def filter_result(metrics: List[TorchBenchModelMetric], *, model, device): + for metric in metrics: + if metric.key.name == model and metric.key.device == device: + return metric + + rows = [] + for model in models: + row = [model] + for device in devices: + metric = filter_result(metrics, model=model, device=device) + if metric is not None: + is_pass = metric.value == "pass" + cell = "✅" if is_pass else "❌" + else: + cell = "" + row.append(cell) + rows.append(row) + + headers = [""] + devices + return headers, rows + + +def to_markdown_table(metrics: List[TorchBenchModelMetric]): + from tabulate import tabulate + + headers, rows = generate_table_rows(metrics) + markdown_table = tabulate(rows, headers=headers, tablefmt="github") + return markdown_table diff --git a/test/benchmark/ascend_npu_benchmark.json b/test/benchmark/ascend_npu_benchmark.json new file mode 100644 index 0000000..2316f39 --- /dev/null +++ b/test/benchmark/ascend_npu_benchmark.json @@ -0,0 +1,11 @@ +{ + "name": "test_bench", + "environ": { + "pytorch_git_version": "dd2e6d61409aac22198ec771560a38adb0018ba2", + "pytorch_version": "2.6.0.dev20241120" + }, + "metrics": { + "model=BERT_pytorch, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_GPT2, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass" + } +} \ No newline at end of file diff --git a/test/benchmark/test_ascend_npu.py b/test/benchmark/test_ascend_npu.py new file mode 100644 index 0000000..f78e48d --- /dev/null +++ b/test/benchmark/test_ascend_npu.py @@ -0,0 +1,22 @@ +import unittest +import src.benchmark.utils as utils + +path = "ascend_npu_benchmark.json" + + +class TestBenchmark(unittest.TestCase): + def test_read_metrics(self): + metrics = utils.read_metrics(path, metric="accuracy") + self.assertTrue(len(metrics) == 2) + for metric in metrics: + self.assertEqual(metric.key.device, "npu") + self.assertEqual(metric.value, "pass") + + def test_to_markdown_table(self): + metrics = utils.read_metrics(path, metric="accuracy") + markdown_table = utils.to_markdown_table(metrics) + self.assertIsNotNone(markdown_table) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/gpt2_test.py b/test/gpt2_test.py deleted file mode 100644 index a35650d..0000000 --- a/test/gpt2_test.py +++ /dev/null @@ -1,147 +0,0 @@ -import os - -import torch -import torch_npu -from transformers import GPT2Tokenizer, GPT2LMHeadModel, TrainingArguments, Trainer -from datasets import load_dataset -from transformers import DataCollatorForLanguageModeling - - -# 固定随机种子 -def set_seed(seed=42): - torch.manual_seed(seed) - if torch.npu.is_available(): - torch.npu.manual_seed_all(seed) - - -# 训练并比较 CPU 和 GPU 的训练损失 -def train_and_compare_gpt2(model_name): - set_seed() - - def train_on_device(use_cpu=False): - # 加载 GPT-2 模型和 tokenizer - model = GPT2LMHeadModel.from_pretrained(model_name) - tokenizer = GPT2Tokenizer.from_pretrained(model_name) - tokenizer.pad_token = tokenizer.eos_token # GPT-2 没有 pad_token,需要将 eos_token 作为 pad_token - - # 加载 wikitext-2 数据集 - train_dataset = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train', verification_mode="no_checks") - val_dataset = load_dataset('wikitext', 'wikitext-2-raw-v1', split='validation', verification_mode="no_checks") - - def preprocess_function(examples): - return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=128) - - train_dataset = train_dataset.map(preprocess_function, batched=True) - val_dataset = val_dataset.map(preprocess_function, batched=True) - - train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask']) - val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask']) - - # 设置训练参数 - training_args = TrainingArguments( - output_dir='./results', - per_device_train_batch_size=4, - per_device_eval_batch_size=4, - num_train_epochs=1, - logging_dir='./logs', - logging_steps=10, - eval_strategy='epoch', - save_strategy='epoch', - report_to="none", - use_cpu=use_cpu - ) - - data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) - - # 创建 Trainer - trainer = Trainer( - data_collator=data_collator, - model=model, - args=training_args, - train_dataset=train_dataset, - eval_dataset=val_dataset - ) - - # 训练模型 - trainer.train() - - # 评估模型 - metrics = trainer.evaluate() - - # 返回评估损失 - return metrics['eval_loss'] - - # 在 GPU 上训练(如果有 GPU) - if torch.npu.is_available(): - print(f"Training on NPU") - gpu_loss = train_on_device(False) - print(f"GPU Training Loss: {gpu_loss:.4f}") - else: - gpu_loss = None - print("No GPU available for training.") - - # 在 CPU 上训练 - if os.getenv("IS_CI"): - # Skip training when running in CI because it's too slow - cpu_loss = 3.0 - else: - print(f"Training on CPU") - cpu_loss = train_on_device(True) - - print(f"CPU Training Loss: {cpu_loss:.4f}") - - return cpu_loss, gpu_loss - - -# 推理并比较 CPU 和 GPU 的推理损失 -def infer_and_compare_gpt2(model_name): - set_seed() - - def infer_on_device(device: torch.device): - # 加载 GPT-2 模型和 tokenizer - model = GPT2LMHeadModel.from_pretrained(model_name).to(device) - tokenizer = GPT2Tokenizer.from_pretrained(model_name) - - # 设置 pad_token 为 eos_token - tokenizer.pad_token = tokenizer.eos_token - - # 推理测试句子 - test_sentence = "The quick brown fox jumps over the lazy dog." - inputs = tokenizer(test_sentence, return_tensors="pt", padding=True, truncation=True).to(device) - - with torch.no_grad(): - outputs = model(**inputs, labels=inputs["input_ids"]) - - # 计算损失 - loss = outputs.loss.item() - return loss - - # 在 GPU 上推理(如果有 GPU) - if torch.npu.is_available(): - gpu_device = torch.device('npu') - gpu_loss = infer_on_device(gpu_device) - print(f"GPU Inference Loss: {gpu_loss:.4f}") - else: - gpu_loss = None - print("No GPU available for inference.") - - # 在 CPU 上推理 - cpu_device = torch.device('cpu') - cpu_loss = infer_on_device(cpu_device) - - print(f"CPU Inference Loss: {cpu_loss:.4f}") - - return cpu_loss, gpu_loss - - -# 主函数 -if __name__ == "__main__": - model_name = "gpt2" - - # 训练并比较训练损失 - print("Comparing Training Loss:") - cpu_train_loss, gpu_train_loss = train_and_compare_gpt2(model_name) - - # 推理并比较推理损失 - print("\nComparing Inference Loss:") - cpu_infer_loss, gpu_infer_loss = infer_and_compare_gpt2(model_name) diff --git a/test/requirements.txt b/test/requirements.txt deleted file mode 100644 index 3f50646..0000000 --- a/test/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -transformers==4.44.2 -datasets==2.21.0